1 // Copyright 2013 the V8 project authors. All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following
11 // disclaimer in the documentation and/or other materials provided
12 // with the distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived
15 // from this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #if V8_TARGET_ARCH_ARM64
30
31 #include "src/codegen/arm64/assembler-arm64.h"
32
33 #include "src/base/bits.h"
34 #include "src/base/cpu.h"
35 #include "src/base/small-vector.h"
36 #include "src/codegen/arm64/assembler-arm64-inl.h"
37 #include "src/codegen/register-configuration.h"
38 #include "src/codegen/safepoint-table.h"
39 #include "src/codegen/string-constants.h"
40 #include "src/execution/frame-constants.h"
41
42 namespace v8 {
43 namespace internal {
44
45 namespace {
46
47 #ifdef USE_SIMULATOR
SimulatorFeaturesFromCommandLine()48 unsigned SimulatorFeaturesFromCommandLine() {
49 if (strcmp(FLAG_sim_arm64_optional_features, "none") == 0) {
50 return 0;
51 }
52 if (strcmp(FLAG_sim_arm64_optional_features, "all") == 0) {
53 return (1u << NUMBER_OF_CPU_FEATURES) - 1;
54 }
55 fprintf(
56 stderr,
57 "Error: unrecognised value for --sim-arm64-optional-features ('%s').\n",
58 FLAG_sim_arm64_optional_features);
59 fprintf(stderr,
60 "Supported values are: none\n"
61 " all\n");
62 FATAL("sim-arm64-optional-features");
63 }
64 #endif // USE_SIMULATOR
65
CpuFeaturesFromCompiler()66 constexpr unsigned CpuFeaturesFromCompiler() {
67 unsigned features = 0;
68 #if defined(__ARM_FEATURE_JCVT)
69 features |= 1u << JSCVT;
70 #endif
71 return features;
72 }
73
CpuFeaturesFromTargetOS()74 constexpr unsigned CpuFeaturesFromTargetOS() {
75 unsigned features = 0;
76 #if defined(V8_TARGET_OS_MACOSX)
77 features |= 1u << JSCVT;
78 #endif
79 return features;
80 }
81
82 } // namespace
83
84 // -----------------------------------------------------------------------------
85 // CpuFeatures implementation.
SupportsWasmSimd128()86 bool CpuFeatures::SupportsWasmSimd128() { return true; }
87
ProbeImpl(bool cross_compile)88 void CpuFeatures::ProbeImpl(bool cross_compile) {
89 // Only use statically determined features for cross compile (snapshot).
90 if (cross_compile) {
91 supported_ |= CpuFeaturesFromCompiler();
92 supported_ |= CpuFeaturesFromTargetOS();
93 return;
94 }
95
96 // We used to probe for coherent cache support, but on older CPUs it
97 // causes crashes (crbug.com/524337), and newer CPUs don't even have
98 // the feature any more.
99
100 #ifdef USE_SIMULATOR
101 supported_ |= SimulatorFeaturesFromCommandLine();
102 #else
103 // Probe for additional features at runtime.
104 base::CPU cpu;
105 unsigned runtime = 0;
106 if (cpu.has_jscvt()) {
107 runtime |= 1u << JSCVT;
108 }
109
110 // Use the best of the features found by CPU detection and those inferred from
111 // the build system.
112 supported_ |= CpuFeaturesFromCompiler();
113 supported_ |= runtime;
114 #endif // USE_SIMULATOR
115
116 // Set a static value on whether Simd is supported.
117 // This variable is only used for certain archs to query SupportWasmSimd128()
118 // at runtime in builtins using an extern ref. Other callers should use
119 // CpuFeatures::SupportWasmSimd128().
120 CpuFeatures::supports_wasm_simd_128_ = CpuFeatures::SupportsWasmSimd128();
121 }
122
PrintTarget()123 void CpuFeatures::PrintTarget() {}
PrintFeatures()124 void CpuFeatures::PrintFeatures() {}
125
126 // -----------------------------------------------------------------------------
127 // CPURegList utilities.
128
PopLowestIndex()129 CPURegister CPURegList::PopLowestIndex() {
130 if (IsEmpty()) {
131 return NoCPUReg;
132 }
133 int index = base::bits::CountTrailingZeros(list_);
134 DCHECK((1LL << index) & list_);
135 Remove(index);
136 return CPURegister::Create(index, size_, type_);
137 }
138
PopHighestIndex()139 CPURegister CPURegList::PopHighestIndex() {
140 if (IsEmpty()) {
141 return NoCPUReg;
142 }
143 int index = CountLeadingZeros(list_, kRegListSizeInBits);
144 index = kRegListSizeInBits - 1 - index;
145 DCHECK((1LL << index) & list_);
146 Remove(index);
147 return CPURegister::Create(index, size_, type_);
148 }
149
Align()150 void CPURegList::Align() {
151 // Use padreg, if necessary, to maintain stack alignment.
152 if (Count() % 2 != 0) {
153 if (IncludesAliasOf(padreg)) {
154 Remove(padreg);
155 } else {
156 Combine(padreg);
157 }
158 }
159
160 DCHECK_EQ(Count() % 2, 0);
161 }
162
GetCalleeSaved(int size)163 CPURegList CPURegList::GetCalleeSaved(int size) {
164 return CPURegList(CPURegister::kRegister, size, 19, 28);
165 }
166
GetCalleeSavedV(int size)167 CPURegList CPURegList::GetCalleeSavedV(int size) {
168 return CPURegList(CPURegister::kVRegister, size, 8, 15);
169 }
170
GetCallerSaved(int size)171 CPURegList CPURegList::GetCallerSaved(int size) {
172 // x18 is the platform register and is reserved for the use of platform ABIs.
173 // Registers x0-x17 are caller-saved.
174 CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 17);
175 return list;
176 }
177
GetCallerSavedV(int size)178 CPURegList CPURegList::GetCallerSavedV(int size) {
179 // Registers d0-d7 and d16-d31 are caller-saved.
180 CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
181 list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
182 return list;
183 }
184
185 // -----------------------------------------------------------------------------
186 // Implementation of RelocInfo
187
188 const int RelocInfo::kApplyMask =
189 RelocInfo::ModeMask(RelocInfo::CODE_TARGET) |
190 RelocInfo::ModeMask(RelocInfo::RUNTIME_ENTRY) |
191 RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE);
192
IsCodedSpecially()193 bool RelocInfo::IsCodedSpecially() {
194 // The deserializer needs to know whether a pointer is specially coded. Being
195 // specially coded on ARM64 means that it is an immediate branch.
196 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
197 if (instr->IsLdrLiteralX()) {
198 return false;
199 } else {
200 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
201 return true;
202 }
203 }
204
IsInConstantPool()205 bool RelocInfo::IsInConstantPool() {
206 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
207 DCHECK_IMPLIES(instr->IsLdrLiteralW(), COMPRESS_POINTERS_BOOL);
208 return instr->IsLdrLiteralX() ||
209 (COMPRESS_POINTERS_BOOL && instr->IsLdrLiteralW());
210 }
211
wasm_call_tag() const212 uint32_t RelocInfo::wasm_call_tag() const {
213 DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
214 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
215 if (instr->IsLdrLiteralX()) {
216 return static_cast<uint32_t>(
217 Memory<Address>(Assembler::target_pointer_address_at(pc_)));
218 } else {
219 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
220 return static_cast<uint32_t>(instr->ImmPCOffset() / kInstrSize);
221 }
222 }
223
AreAliased(const CPURegister & reg1,const CPURegister & reg2,const CPURegister & reg3,const CPURegister & reg4,const CPURegister & reg5,const CPURegister & reg6,const CPURegister & reg7,const CPURegister & reg8)224 bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
225 const CPURegister& reg3, const CPURegister& reg4,
226 const CPURegister& reg5, const CPURegister& reg6,
227 const CPURegister& reg7, const CPURegister& reg8) {
228 int number_of_valid_regs = 0;
229 int number_of_valid_fpregs = 0;
230
231 RegList unique_regs = 0;
232 RegList unique_fpregs = 0;
233
234 const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
235
236 for (unsigned i = 0; i < arraysize(regs); i++) {
237 if (regs[i].IsRegister()) {
238 number_of_valid_regs++;
239 unique_regs |= regs[i].bit();
240 } else if (regs[i].IsVRegister()) {
241 number_of_valid_fpregs++;
242 unique_fpregs |= regs[i].bit();
243 } else {
244 DCHECK(!regs[i].is_valid());
245 }
246 }
247
248 int number_of_unique_regs =
249 CountSetBits(unique_regs, sizeof(unique_regs) * kBitsPerByte);
250 int number_of_unique_fpregs =
251 CountSetBits(unique_fpregs, sizeof(unique_fpregs) * kBitsPerByte);
252
253 DCHECK(number_of_valid_regs >= number_of_unique_regs);
254 DCHECK(number_of_valid_fpregs >= number_of_unique_fpregs);
255
256 return (number_of_valid_regs != number_of_unique_regs) ||
257 (number_of_valid_fpregs != number_of_unique_fpregs);
258 }
259
AreSameSizeAndType(const CPURegister & reg1,const CPURegister & reg2,const CPURegister & reg3,const CPURegister & reg4,const CPURegister & reg5,const CPURegister & reg6,const CPURegister & reg7,const CPURegister & reg8)260 bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
261 const CPURegister& reg3, const CPURegister& reg4,
262 const CPURegister& reg5, const CPURegister& reg6,
263 const CPURegister& reg7, const CPURegister& reg8) {
264 DCHECK(reg1.is_valid());
265 bool match = true;
266 match &= !reg2.is_valid() || reg2.IsSameSizeAndType(reg1);
267 match &= !reg3.is_valid() || reg3.IsSameSizeAndType(reg1);
268 match &= !reg4.is_valid() || reg4.IsSameSizeAndType(reg1);
269 match &= !reg5.is_valid() || reg5.IsSameSizeAndType(reg1);
270 match &= !reg6.is_valid() || reg6.IsSameSizeAndType(reg1);
271 match &= !reg7.is_valid() || reg7.IsSameSizeAndType(reg1);
272 match &= !reg8.is_valid() || reg8.IsSameSizeAndType(reg1);
273 return match;
274 }
275
AreSameFormat(const VRegister & reg1,const VRegister & reg2,const VRegister & reg3,const VRegister & reg4)276 bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
277 const VRegister& reg3, const VRegister& reg4) {
278 DCHECK(reg1.is_valid());
279 return (!reg2.is_valid() || reg2.IsSameFormat(reg1)) &&
280 (!reg3.is_valid() || reg3.IsSameFormat(reg1)) &&
281 (!reg4.is_valid() || reg4.IsSameFormat(reg1));
282 }
283
AreConsecutive(const VRegister & reg1,const VRegister & reg2,const VRegister & reg3,const VRegister & reg4)284 bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
285 const VRegister& reg3, const VRegister& reg4) {
286 DCHECK(reg1.is_valid());
287 if (!reg2.is_valid()) {
288 DCHECK(!reg3.is_valid() && !reg4.is_valid());
289 return true;
290 } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfVRegisters)) {
291 return false;
292 }
293
294 if (!reg3.is_valid()) {
295 DCHECK(!reg4.is_valid());
296 return true;
297 } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfVRegisters)) {
298 return false;
299 }
300
301 if (!reg4.is_valid()) {
302 return true;
303 } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfVRegisters)) {
304 return false;
305 }
306
307 return true;
308 }
309
NeedsRelocation(const Assembler * assembler) const310 bool Operand::NeedsRelocation(const Assembler* assembler) const {
311 RelocInfo::Mode rmode = immediate_.rmode();
312
313 if (RelocInfo::IsOnlyForSerializer(rmode)) {
314 return assembler->options().record_reloc_info_for_serialization;
315 }
316
317 return !RelocInfo::IsNone(rmode);
318 }
319
320 // Assembler
Assembler(const AssemblerOptions & options,std::unique_ptr<AssemblerBuffer> buffer)321 Assembler::Assembler(const AssemblerOptions& options,
322 std::unique_ptr<AssemblerBuffer> buffer)
323 : AssemblerBase(options, std::move(buffer)),
324 unresolved_branches_(),
325 constpool_(this) {
326 veneer_pool_blocked_nesting_ = 0;
327 Reset();
328
329 #if defined(V8_OS_WIN)
330 if (options.collect_win64_unwind_info) {
331 xdata_encoder_ = std::make_unique<win64_unwindinfo::XdataEncoder>(*this);
332 }
333 #endif
334 }
335
~Assembler()336 Assembler::~Assembler() {
337 DCHECK(constpool_.IsEmpty());
338 DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
339 }
340
AbortedCodeGeneration()341 void Assembler::AbortedCodeGeneration() { constpool_.Clear(); }
342
Reset()343 void Assembler::Reset() {
344 #ifdef DEBUG
345 DCHECK((pc_ >= buffer_start_) && (pc_ < buffer_start_ + buffer_->size()));
346 DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
347 DCHECK(unresolved_branches_.empty());
348 memset(buffer_start_, 0, pc_ - buffer_start_);
349 #endif
350 pc_ = buffer_start_;
351 reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
352 constpool_.Clear();
353 next_veneer_pool_check_ = kMaxInt;
354 }
355
356 #if defined(V8_OS_WIN)
GetUnwindInfo() const357 win64_unwindinfo::BuiltinUnwindInfo Assembler::GetUnwindInfo() const {
358 DCHECK(options().collect_win64_unwind_info);
359 DCHECK_NOT_NULL(xdata_encoder_);
360 return xdata_encoder_->unwinding_info();
361 }
362 #endif
363
AllocateAndInstallRequestedHeapObjects(Isolate * isolate)364 void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
365 DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
366 for (auto& request : heap_object_requests_) {
367 Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
368 switch (request.kind()) {
369 case HeapObjectRequest::kHeapNumber: {
370 Handle<HeapObject> object =
371 isolate->factory()->NewHeapNumber<AllocationType::kOld>(
372 request.heap_number());
373 EmbeddedObjectIndex index = AddEmbeddedObject(object);
374 set_embedded_object_index_referenced_from(pc, index);
375 break;
376 }
377 case HeapObjectRequest::kStringConstant: {
378 const StringConstantBase* str = request.string();
379 CHECK_NOT_NULL(str);
380 EmbeddedObjectIndex index =
381 AddEmbeddedObject(str->AllocateStringConstant(isolate));
382 set_embedded_object_index_referenced_from(pc, index);
383 break;
384 }
385 }
386 }
387 }
388
GetCode(Isolate * isolate,CodeDesc * desc,SafepointTableBuilder * safepoint_table_builder,int handler_table_offset)389 void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
390 SafepointTableBuilder* safepoint_table_builder,
391 int handler_table_offset) {
392 // As a crutch to avoid having to add manual Align calls wherever we use a
393 // raw workflow to create Code objects (mostly in tests), add another Align
394 // call here. It does no harm - the end of the Code object is aligned to the
395 // (larger) kCodeAlignment anyways.
396 // TODO(jgruber): Consider moving responsibility for proper alignment to
397 // metadata table builders (safepoint, handler, constant pool, code
398 // comments).
399 DataAlign(Code::kMetadataAlignment);
400
401 // Emit constant pool if necessary.
402 ForceConstantPoolEmissionWithoutJump();
403 DCHECK(constpool_.IsEmpty());
404
405 int code_comments_size = WriteCodeComments();
406
407 AllocateAndInstallRequestedHeapObjects(isolate);
408
409 // Set up code descriptor.
410 // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
411 // this point to make CodeDesc initialization less fiddly.
412
413 static constexpr int kConstantPoolSize = 0;
414 const int instruction_size = pc_offset();
415 const int code_comments_offset = instruction_size - code_comments_size;
416 const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
417 const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
418 ? constant_pool_offset
419 : handler_table_offset;
420 const int safepoint_table_offset =
421 (safepoint_table_builder == kNoSafepointTable)
422 ? handler_table_offset2
423 : safepoint_table_builder->GetCodeOffset();
424 const int reloc_info_offset =
425 static_cast<int>(reloc_info_writer.pos() - buffer_->start());
426 CodeDesc::Initialize(desc, this, safepoint_table_offset,
427 handler_table_offset2, constant_pool_offset,
428 code_comments_offset, reloc_info_offset);
429 }
430
Align(int m)431 void Assembler::Align(int m) {
432 // If not, the loop below won't terminate.
433 DCHECK(IsAligned(pc_offset(), kInstrSize));
434 DCHECK(m >= kInstrSize && base::bits::IsPowerOfTwo(m));
435 while ((pc_offset() & (m - 1)) != 0) {
436 nop();
437 }
438 }
439
CodeTargetAlign()440 void Assembler::CodeTargetAlign() {
441 // Preferred alignment of jump targets on some ARM chips.
442 Align(8);
443 }
444
CheckLabelLinkChain(Label const * label)445 void Assembler::CheckLabelLinkChain(Label const* label) {
446 #ifdef DEBUG
447 if (label->is_linked()) {
448 static const int kMaxLinksToCheck = 64; // Avoid O(n2) behaviour.
449 int links_checked = 0;
450 int64_t linkoffset = label->pos();
451 bool end_of_chain = false;
452 while (!end_of_chain) {
453 if (++links_checked > kMaxLinksToCheck) break;
454 Instruction* link = InstructionAt(linkoffset);
455 int64_t linkpcoffset = link->ImmPCOffset();
456 int64_t prevlinkoffset = linkoffset + linkpcoffset;
457
458 end_of_chain = (linkoffset == prevlinkoffset);
459 linkoffset = linkoffset + linkpcoffset;
460 }
461 }
462 #endif
463 }
464
RemoveBranchFromLabelLinkChain(Instruction * branch,Label * label,Instruction * label_veneer)465 void Assembler::RemoveBranchFromLabelLinkChain(Instruction* branch,
466 Label* label,
467 Instruction* label_veneer) {
468 DCHECK(label->is_linked());
469
470 CheckLabelLinkChain(label);
471
472 Instruction* link = InstructionAt(label->pos());
473 Instruction* prev_link = link;
474 Instruction* next_link;
475 bool end_of_chain = false;
476
477 while (link != branch && !end_of_chain) {
478 next_link = link->ImmPCOffsetTarget();
479 end_of_chain = (link == next_link);
480 prev_link = link;
481 link = next_link;
482 }
483
484 DCHECK(branch == link);
485 next_link = branch->ImmPCOffsetTarget();
486
487 if (branch == prev_link) {
488 // The branch is the first instruction in the chain.
489 if (branch == next_link) {
490 // It is also the last instruction in the chain, so it is the only branch
491 // currently referring to this label.
492 label->Unuse();
493 } else {
494 label->link_to(
495 static_cast<int>(reinterpret_cast<byte*>(next_link) - buffer_start_));
496 }
497
498 } else if (branch == next_link) {
499 // The branch is the last (but not also the first) instruction in the chain.
500 prev_link->SetImmPCOffsetTarget(options(), prev_link);
501
502 } else {
503 // The branch is in the middle of the chain.
504 if (prev_link->IsTargetInImmPCOffsetRange(next_link)) {
505 prev_link->SetImmPCOffsetTarget(options(), next_link);
506 } else if (label_veneer != nullptr) {
507 // Use the veneer for all previous links in the chain.
508 prev_link->SetImmPCOffsetTarget(options(), prev_link);
509
510 end_of_chain = false;
511 link = next_link;
512 while (!end_of_chain) {
513 next_link = link->ImmPCOffsetTarget();
514 end_of_chain = (link == next_link);
515 link->SetImmPCOffsetTarget(options(), label_veneer);
516 link = next_link;
517 }
518 } else {
519 // The assert below will fire.
520 // Some other work could be attempted to fix up the chain, but it would be
521 // rather complicated. If we crash here, we may want to consider using an
522 // other mechanism than a chain of branches.
523 //
524 // Note that this situation currently should not happen, as we always call
525 // this function with a veneer to the target label.
526 // However this could happen with a MacroAssembler in the following state:
527 // [previous code]
528 // B(label);
529 // [20KB code]
530 // Tbz(label); // First tbz. Pointing to unconditional branch.
531 // [20KB code]
532 // Tbz(label); // Second tbz. Pointing to the first tbz.
533 // [more code]
534 // and this function is called to remove the first tbz from the label link
535 // chain. Since tbz has a range of +-32KB, the second tbz cannot point to
536 // the unconditional branch.
537 CHECK(prev_link->IsTargetInImmPCOffsetRange(next_link));
538 UNREACHABLE();
539 }
540 }
541
542 CheckLabelLinkChain(label);
543 }
544
bind(Label * label)545 void Assembler::bind(Label* label) {
546 // Bind label to the address at pc_. All instructions (most likely branches)
547 // that are linked to this label will be updated to point to the newly-bound
548 // label.
549
550 DCHECK(!label->is_near_linked());
551 DCHECK(!label->is_bound());
552
553 DeleteUnresolvedBranchInfoForLabel(label);
554
555 // If the label is linked, the link chain looks something like this:
556 //
557 // |--I----I-------I-------L
558 // |---------------------->| pc_offset
559 // |-------------->| linkoffset = label->pos()
560 // |<------| link->ImmPCOffset()
561 // |------>| prevlinkoffset = linkoffset + link->ImmPCOffset()
562 //
563 // On each iteration, the last link is updated and then removed from the
564 // chain until only one remains. At that point, the label is bound.
565 //
566 // If the label is not linked, no preparation is required before binding.
567 while (label->is_linked()) {
568 int linkoffset = label->pos();
569 Instruction* link = InstructionAt(linkoffset);
570 int prevlinkoffset = linkoffset + static_cast<int>(link->ImmPCOffset());
571
572 CheckLabelLinkChain(label);
573
574 DCHECK_GE(linkoffset, 0);
575 DCHECK(linkoffset < pc_offset());
576 DCHECK((linkoffset > prevlinkoffset) ||
577 (linkoffset - prevlinkoffset == kStartOfLabelLinkChain));
578 DCHECK_GE(prevlinkoffset, 0);
579
580 // Update the link to point to the label.
581 if (link->IsUnresolvedInternalReference()) {
582 // Internal references do not get patched to an instruction but directly
583 // to an address.
584 internal_reference_positions_.push_back(linkoffset);
585 memcpy(link, &pc_, kSystemPointerSize);
586 } else {
587 link->SetImmPCOffsetTarget(options(),
588 reinterpret_cast<Instruction*>(pc_));
589 }
590
591 // Link the label to the previous link in the chain.
592 if (linkoffset - prevlinkoffset == kStartOfLabelLinkChain) {
593 // We hit kStartOfLabelLinkChain, so the chain is fully processed.
594 label->Unuse();
595 } else {
596 // Update the label for the next iteration.
597 label->link_to(prevlinkoffset);
598 }
599 }
600 label->bind_to(pc_offset());
601
602 DCHECK(label->is_bound());
603 DCHECK(!label->is_linked());
604 }
605
LinkAndGetByteOffsetTo(Label * label)606 int Assembler::LinkAndGetByteOffsetTo(Label* label) {
607 DCHECK_EQ(sizeof(*pc_), 1);
608 CheckLabelLinkChain(label);
609
610 int offset;
611 if (label->is_bound()) {
612 // The label is bound, so it does not need to be updated. Referring
613 // instructions must link directly to the label as they will not be
614 // updated.
615 //
616 // In this case, label->pos() returns the offset of the label from the
617 // start of the buffer.
618 //
619 // Note that offset can be zero for self-referential instructions. (This
620 // could be useful for ADR, for example.)
621 offset = label->pos() - pc_offset();
622 DCHECK_LE(offset, 0);
623 } else {
624 if (label->is_linked()) {
625 // The label is linked, so the referring instruction should be added onto
626 // the end of the label's link chain.
627 //
628 // In this case, label->pos() returns the offset of the last linked
629 // instruction from the start of the buffer.
630 offset = label->pos() - pc_offset();
631 DCHECK_NE(offset, kStartOfLabelLinkChain);
632 // Note that the offset here needs to be PC-relative only so that the
633 // first instruction in a buffer can link to an unbound label. Otherwise,
634 // the offset would be 0 for this case, and 0 is reserved for
635 // kStartOfLabelLinkChain.
636 } else {
637 // The label is unused, so it now becomes linked and the referring
638 // instruction is at the start of the new link chain.
639 offset = kStartOfLabelLinkChain;
640 }
641 // The instruction at pc is now the last link in the label's chain.
642 label->link_to(pc_offset());
643 }
644
645 return offset;
646 }
647
DeleteUnresolvedBranchInfoForLabelTraverse(Label * label)648 void Assembler::DeleteUnresolvedBranchInfoForLabelTraverse(Label* label) {
649 DCHECK(label->is_linked());
650 CheckLabelLinkChain(label);
651
652 int link_offset = label->pos();
653 int link_pcoffset;
654 bool end_of_chain = false;
655
656 while (!end_of_chain) {
657 Instruction* link = InstructionAt(link_offset);
658 link_pcoffset = static_cast<int>(link->ImmPCOffset());
659
660 // ADR instructions are not handled by veneers.
661 if (link->IsImmBranch()) {
662 int max_reachable_pc =
663 static_cast<int>(InstructionOffset(link) +
664 Instruction::ImmBranchRange(link->BranchType()));
665 using unresolved_info_it = std::multimap<int, FarBranchInfo>::iterator;
666 std::pair<unresolved_info_it, unresolved_info_it> range;
667 range = unresolved_branches_.equal_range(max_reachable_pc);
668 unresolved_info_it it;
669 for (it = range.first; it != range.second; ++it) {
670 if (it->second.pc_offset_ == link_offset) {
671 unresolved_branches_.erase(it);
672 break;
673 }
674 }
675 }
676
677 end_of_chain = (link_pcoffset == 0);
678 link_offset = link_offset + link_pcoffset;
679 }
680 }
681
DeleteUnresolvedBranchInfoForLabel(Label * label)682 void Assembler::DeleteUnresolvedBranchInfoForLabel(Label* label) {
683 if (unresolved_branches_.empty()) {
684 DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
685 return;
686 }
687
688 if (label->is_linked()) {
689 // Branches to this label will be resolved when the label is bound, normally
690 // just after all the associated info has been deleted.
691 DeleteUnresolvedBranchInfoForLabelTraverse(label);
692 }
693 if (unresolved_branches_.empty()) {
694 next_veneer_pool_check_ = kMaxInt;
695 } else {
696 next_veneer_pool_check_ =
697 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
698 }
699 }
700
IsConstantPoolAt(Instruction * instr)701 bool Assembler::IsConstantPoolAt(Instruction* instr) {
702 // The constant pool marker is made of two instructions. These instructions
703 // will never be emitted by the JIT, so checking for the first one is enough:
704 // 0: ldr xzr, #<size of pool>
705 bool result = instr->IsLdrLiteralX() && (instr->Rt() == kZeroRegCode);
706
707 // It is still worth asserting the marker is complete.
708 // 4: blr xzr
709 DCHECK(!result || (instr->following()->IsBranchAndLinkToRegister() &&
710 instr->following()->Rn() == kZeroRegCode));
711
712 return result;
713 }
714
ConstantPoolSizeAt(Instruction * instr)715 int Assembler::ConstantPoolSizeAt(Instruction* instr) {
716 #ifdef USE_SIMULATOR
717 // Assembler::debug() embeds constants directly into the instruction stream.
718 // Although this is not a genuine constant pool, treat it like one to avoid
719 // disassembling the constants.
720 if ((instr->Mask(ExceptionMask) == HLT) &&
721 (instr->ImmException() == kImmExceptionIsDebug)) {
722 const char* message = reinterpret_cast<const char*>(
723 instr->InstructionAtOffset(kDebugMessageOffset));
724 int size = static_cast<int>(kDebugMessageOffset + strlen(message) + 1);
725 return RoundUp(size, kInstrSize) / kInstrSize;
726 }
727 // Same for printf support, see MacroAssembler::CallPrintf().
728 if ((instr->Mask(ExceptionMask) == HLT) &&
729 (instr->ImmException() == kImmExceptionIsPrintf)) {
730 return kPrintfLength / kInstrSize;
731 }
732 #endif
733 if (IsConstantPoolAt(instr)) {
734 return instr->ImmLLiteral();
735 } else {
736 return -1;
737 }
738 }
739
EmitPoolGuard()740 void Assembler::EmitPoolGuard() {
741 // We must generate only one instruction as this is used in scopes that
742 // control the size of the code generated.
743 Emit(BLR | Rn(xzr));
744 }
745
StartBlockVeneerPool()746 void Assembler::StartBlockVeneerPool() { ++veneer_pool_blocked_nesting_; }
747
EndBlockVeneerPool()748 void Assembler::EndBlockVeneerPool() {
749 if (--veneer_pool_blocked_nesting_ == 0) {
750 // Check the veneer pool hasn't been blocked for too long.
751 DCHECK(unresolved_branches_.empty() ||
752 (pc_offset() < unresolved_branches_first_limit()));
753 }
754 }
755
br(const Register & xn)756 void Assembler::br(const Register& xn) {
757 DCHECK(xn.Is64Bits());
758 Emit(BR | Rn(xn));
759 }
760
blr(const Register & xn)761 void Assembler::blr(const Register& xn) {
762 DCHECK(xn.Is64Bits());
763 // The pattern 'blr xzr' is used as a guard to detect when execution falls
764 // through the constant pool. It should not be emitted.
765 DCHECK_NE(xn, xzr);
766 Emit(BLR | Rn(xn));
767 }
768
ret(const Register & xn)769 void Assembler::ret(const Register& xn) {
770 DCHECK(xn.Is64Bits());
771 Emit(RET | Rn(xn));
772 }
773
b(int imm26)774 void Assembler::b(int imm26) { Emit(B | ImmUncondBranch(imm26)); }
775
b(Label * label)776 void Assembler::b(Label* label) { b(LinkAndGetInstructionOffsetTo(label)); }
777
b(int imm19,Condition cond)778 void Assembler::b(int imm19, Condition cond) {
779 Emit(B_cond | ImmCondBranch(imm19) | cond);
780 }
781
b(Label * label,Condition cond)782 void Assembler::b(Label* label, Condition cond) {
783 b(LinkAndGetInstructionOffsetTo(label), cond);
784 }
785
bl(int imm26)786 void Assembler::bl(int imm26) { Emit(BL | ImmUncondBranch(imm26)); }
787
bl(Label * label)788 void Assembler::bl(Label* label) { bl(LinkAndGetInstructionOffsetTo(label)); }
789
cbz(const Register & rt,int imm19)790 void Assembler::cbz(const Register& rt, int imm19) {
791 Emit(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt));
792 }
793
cbz(const Register & rt,Label * label)794 void Assembler::cbz(const Register& rt, Label* label) {
795 cbz(rt, LinkAndGetInstructionOffsetTo(label));
796 }
797
cbnz(const Register & rt,int imm19)798 void Assembler::cbnz(const Register& rt, int imm19) {
799 Emit(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt));
800 }
801
cbnz(const Register & rt,Label * label)802 void Assembler::cbnz(const Register& rt, Label* label) {
803 cbnz(rt, LinkAndGetInstructionOffsetTo(label));
804 }
805
tbz(const Register & rt,unsigned bit_pos,int imm14)806 void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14) {
807 DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
808 Emit(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
809 }
810
tbz(const Register & rt,unsigned bit_pos,Label * label)811 void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) {
812 tbz(rt, bit_pos, LinkAndGetInstructionOffsetTo(label));
813 }
814
tbnz(const Register & rt,unsigned bit_pos,int imm14)815 void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14) {
816 DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
817 Emit(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
818 }
819
tbnz(const Register & rt,unsigned bit_pos,Label * label)820 void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) {
821 tbnz(rt, bit_pos, LinkAndGetInstructionOffsetTo(label));
822 }
823
adr(const Register & rd,int imm21)824 void Assembler::adr(const Register& rd, int imm21) {
825 DCHECK(rd.Is64Bits());
826 Emit(ADR | ImmPCRelAddress(imm21) | Rd(rd));
827 }
828
adr(const Register & rd,Label * label)829 void Assembler::adr(const Register& rd, Label* label) {
830 adr(rd, LinkAndGetByteOffsetTo(label));
831 }
832
nop(NopMarkerTypes n)833 void Assembler::nop(NopMarkerTypes n) {
834 DCHECK((FIRST_NOP_MARKER <= n) && (n <= LAST_NOP_MARKER));
835 mov(Register::XRegFromCode(n), Register::XRegFromCode(n));
836 }
837
add(const Register & rd,const Register & rn,const Operand & operand)838 void Assembler::add(const Register& rd, const Register& rn,
839 const Operand& operand) {
840 AddSub(rd, rn, operand, LeaveFlags, ADD);
841 }
842
adds(const Register & rd,const Register & rn,const Operand & operand)843 void Assembler::adds(const Register& rd, const Register& rn,
844 const Operand& operand) {
845 AddSub(rd, rn, operand, SetFlags, ADD);
846 }
847
cmn(const Register & rn,const Operand & operand)848 void Assembler::cmn(const Register& rn, const Operand& operand) {
849 Register zr = AppropriateZeroRegFor(rn);
850 adds(zr, rn, operand);
851 }
852
sub(const Register & rd,const Register & rn,const Operand & operand)853 void Assembler::sub(const Register& rd, const Register& rn,
854 const Operand& operand) {
855 AddSub(rd, rn, operand, LeaveFlags, SUB);
856 }
857
subs(const Register & rd,const Register & rn,const Operand & operand)858 void Assembler::subs(const Register& rd, const Register& rn,
859 const Operand& operand) {
860 AddSub(rd, rn, operand, SetFlags, SUB);
861 }
862
cmp(const Register & rn,const Operand & operand)863 void Assembler::cmp(const Register& rn, const Operand& operand) {
864 Register zr = AppropriateZeroRegFor(rn);
865 subs(zr, rn, operand);
866 }
867
neg(const Register & rd,const Operand & operand)868 void Assembler::neg(const Register& rd, const Operand& operand) {
869 Register zr = AppropriateZeroRegFor(rd);
870 sub(rd, zr, operand);
871 }
872
negs(const Register & rd,const Operand & operand)873 void Assembler::negs(const Register& rd, const Operand& operand) {
874 Register zr = AppropriateZeroRegFor(rd);
875 subs(rd, zr, operand);
876 }
877
adc(const Register & rd,const Register & rn,const Operand & operand)878 void Assembler::adc(const Register& rd, const Register& rn,
879 const Operand& operand) {
880 AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC);
881 }
882
adcs(const Register & rd,const Register & rn,const Operand & operand)883 void Assembler::adcs(const Register& rd, const Register& rn,
884 const Operand& operand) {
885 AddSubWithCarry(rd, rn, operand, SetFlags, ADC);
886 }
887
sbc(const Register & rd,const Register & rn,const Operand & operand)888 void Assembler::sbc(const Register& rd, const Register& rn,
889 const Operand& operand) {
890 AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC);
891 }
892
sbcs(const Register & rd,const Register & rn,const Operand & operand)893 void Assembler::sbcs(const Register& rd, const Register& rn,
894 const Operand& operand) {
895 AddSubWithCarry(rd, rn, operand, SetFlags, SBC);
896 }
897
ngc(const Register & rd,const Operand & operand)898 void Assembler::ngc(const Register& rd, const Operand& operand) {
899 Register zr = AppropriateZeroRegFor(rd);
900 sbc(rd, zr, operand);
901 }
902
ngcs(const Register & rd,const Operand & operand)903 void Assembler::ngcs(const Register& rd, const Operand& operand) {
904 Register zr = AppropriateZeroRegFor(rd);
905 sbcs(rd, zr, operand);
906 }
907
908 // Logical instructions.
and_(const Register & rd,const Register & rn,const Operand & operand)909 void Assembler::and_(const Register& rd, const Register& rn,
910 const Operand& operand) {
911 Logical(rd, rn, operand, AND);
912 }
913
ands(const Register & rd,const Register & rn,const Operand & operand)914 void Assembler::ands(const Register& rd, const Register& rn,
915 const Operand& operand) {
916 Logical(rd, rn, operand, ANDS);
917 }
918
tst(const Register & rn,const Operand & operand)919 void Assembler::tst(const Register& rn, const Operand& operand) {
920 ands(AppropriateZeroRegFor(rn), rn, operand);
921 }
922
bic(const Register & rd,const Register & rn,const Operand & operand)923 void Assembler::bic(const Register& rd, const Register& rn,
924 const Operand& operand) {
925 Logical(rd, rn, operand, BIC);
926 }
927
bics(const Register & rd,const Register & rn,const Operand & operand)928 void Assembler::bics(const Register& rd, const Register& rn,
929 const Operand& operand) {
930 Logical(rd, rn, operand, BICS);
931 }
932
orr(const Register & rd,const Register & rn,const Operand & operand)933 void Assembler::orr(const Register& rd, const Register& rn,
934 const Operand& operand) {
935 Logical(rd, rn, operand, ORR);
936 }
937
orn(const Register & rd,const Register & rn,const Operand & operand)938 void Assembler::orn(const Register& rd, const Register& rn,
939 const Operand& operand) {
940 Logical(rd, rn, operand, ORN);
941 }
942
eor(const Register & rd,const Register & rn,const Operand & operand)943 void Assembler::eor(const Register& rd, const Register& rn,
944 const Operand& operand) {
945 Logical(rd, rn, operand, EOR);
946 }
947
eon(const Register & rd,const Register & rn,const Operand & operand)948 void Assembler::eon(const Register& rd, const Register& rn,
949 const Operand& operand) {
950 Logical(rd, rn, operand, EON);
951 }
952
lslv(const Register & rd,const Register & rn,const Register & rm)953 void Assembler::lslv(const Register& rd, const Register& rn,
954 const Register& rm) {
955 DCHECK(rd.SizeInBits() == rn.SizeInBits());
956 DCHECK(rd.SizeInBits() == rm.SizeInBits());
957 Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd));
958 }
959
lsrv(const Register & rd,const Register & rn,const Register & rm)960 void Assembler::lsrv(const Register& rd, const Register& rn,
961 const Register& rm) {
962 DCHECK(rd.SizeInBits() == rn.SizeInBits());
963 DCHECK(rd.SizeInBits() == rm.SizeInBits());
964 Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd));
965 }
966
asrv(const Register & rd,const Register & rn,const Register & rm)967 void Assembler::asrv(const Register& rd, const Register& rn,
968 const Register& rm) {
969 DCHECK(rd.SizeInBits() == rn.SizeInBits());
970 DCHECK(rd.SizeInBits() == rm.SizeInBits());
971 Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd));
972 }
973
rorv(const Register & rd,const Register & rn,const Register & rm)974 void Assembler::rorv(const Register& rd, const Register& rn,
975 const Register& rm) {
976 DCHECK(rd.SizeInBits() == rn.SizeInBits());
977 DCHECK(rd.SizeInBits() == rm.SizeInBits());
978 Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd));
979 }
980
981 // Bitfield operations.
bfm(const Register & rd,const Register & rn,int immr,int imms)982 void Assembler::bfm(const Register& rd, const Register& rn, int immr,
983 int imms) {
984 DCHECK(rd.SizeInBits() == rn.SizeInBits());
985 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
986 Emit(SF(rd) | BFM | N | ImmR(immr, rd.SizeInBits()) |
987 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
988 }
989
sbfm(const Register & rd,const Register & rn,int immr,int imms)990 void Assembler::sbfm(const Register& rd, const Register& rn, int immr,
991 int imms) {
992 DCHECK(rd.Is64Bits() || rn.Is32Bits());
993 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
994 Emit(SF(rd) | SBFM | N | ImmR(immr, rd.SizeInBits()) |
995 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
996 }
997
ubfm(const Register & rd,const Register & rn,int immr,int imms)998 void Assembler::ubfm(const Register& rd, const Register& rn, int immr,
999 int imms) {
1000 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1001 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
1002 Emit(SF(rd) | UBFM | N | ImmR(immr, rd.SizeInBits()) |
1003 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
1004 }
1005
extr(const Register & rd,const Register & rn,const Register & rm,int lsb)1006 void Assembler::extr(const Register& rd, const Register& rn, const Register& rm,
1007 int lsb) {
1008 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1009 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1010 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
1011 Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.SizeInBits()) | Rn(rn) |
1012 Rd(rd));
1013 }
1014
csel(const Register & rd,const Register & rn,const Register & rm,Condition cond)1015 void Assembler::csel(const Register& rd, const Register& rn, const Register& rm,
1016 Condition cond) {
1017 ConditionalSelect(rd, rn, rm, cond, CSEL);
1018 }
1019
csinc(const Register & rd,const Register & rn,const Register & rm,Condition cond)1020 void Assembler::csinc(const Register& rd, const Register& rn,
1021 const Register& rm, Condition cond) {
1022 ConditionalSelect(rd, rn, rm, cond, CSINC);
1023 }
1024
csinv(const Register & rd,const Register & rn,const Register & rm,Condition cond)1025 void Assembler::csinv(const Register& rd, const Register& rn,
1026 const Register& rm, Condition cond) {
1027 ConditionalSelect(rd, rn, rm, cond, CSINV);
1028 }
1029
csneg(const Register & rd,const Register & rn,const Register & rm,Condition cond)1030 void Assembler::csneg(const Register& rd, const Register& rn,
1031 const Register& rm, Condition cond) {
1032 ConditionalSelect(rd, rn, rm, cond, CSNEG);
1033 }
1034
cset(const Register & rd,Condition cond)1035 void Assembler::cset(const Register& rd, Condition cond) {
1036 DCHECK((cond != al) && (cond != nv));
1037 Register zr = AppropriateZeroRegFor(rd);
1038 csinc(rd, zr, zr, NegateCondition(cond));
1039 }
1040
csetm(const Register & rd,Condition cond)1041 void Assembler::csetm(const Register& rd, Condition cond) {
1042 DCHECK((cond != al) && (cond != nv));
1043 Register zr = AppropriateZeroRegFor(rd);
1044 csinv(rd, zr, zr, NegateCondition(cond));
1045 }
1046
cinc(const Register & rd,const Register & rn,Condition cond)1047 void Assembler::cinc(const Register& rd, const Register& rn, Condition cond) {
1048 DCHECK((cond != al) && (cond != nv));
1049 csinc(rd, rn, rn, NegateCondition(cond));
1050 }
1051
cinv(const Register & rd,const Register & rn,Condition cond)1052 void Assembler::cinv(const Register& rd, const Register& rn, Condition cond) {
1053 DCHECK((cond != al) && (cond != nv));
1054 csinv(rd, rn, rn, NegateCondition(cond));
1055 }
1056
cneg(const Register & rd,const Register & rn,Condition cond)1057 void Assembler::cneg(const Register& rd, const Register& rn, Condition cond) {
1058 DCHECK((cond != al) && (cond != nv));
1059 csneg(rd, rn, rn, NegateCondition(cond));
1060 }
1061
ConditionalSelect(const Register & rd,const Register & rn,const Register & rm,Condition cond,ConditionalSelectOp op)1062 void Assembler::ConditionalSelect(const Register& rd, const Register& rn,
1063 const Register& rm, Condition cond,
1064 ConditionalSelectOp op) {
1065 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1066 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1067 Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd));
1068 }
1069
ccmn(const Register & rn,const Operand & operand,StatusFlags nzcv,Condition cond)1070 void Assembler::ccmn(const Register& rn, const Operand& operand,
1071 StatusFlags nzcv, Condition cond) {
1072 ConditionalCompare(rn, operand, nzcv, cond, CCMN);
1073 }
1074
ccmp(const Register & rn,const Operand & operand,StatusFlags nzcv,Condition cond)1075 void Assembler::ccmp(const Register& rn, const Operand& operand,
1076 StatusFlags nzcv, Condition cond) {
1077 ConditionalCompare(rn, operand, nzcv, cond, CCMP);
1078 }
1079
DataProcessing3Source(const Register & rd,const Register & rn,const Register & rm,const Register & ra,DataProcessing3SourceOp op)1080 void Assembler::DataProcessing3Source(const Register& rd, const Register& rn,
1081 const Register& rm, const Register& ra,
1082 DataProcessing3SourceOp op) {
1083 Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd));
1084 }
1085
mul(const Register & rd,const Register & rn,const Register & rm)1086 void Assembler::mul(const Register& rd, const Register& rn,
1087 const Register& rm) {
1088 DCHECK(AreSameSizeAndType(rd, rn, rm));
1089 Register zr = AppropriateZeroRegFor(rn);
1090 DataProcessing3Source(rd, rn, rm, zr, MADD);
1091 }
1092
madd(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1093 void Assembler::madd(const Register& rd, const Register& rn, const Register& rm,
1094 const Register& ra) {
1095 DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1096 DataProcessing3Source(rd, rn, rm, ra, MADD);
1097 }
1098
mneg(const Register & rd,const Register & rn,const Register & rm)1099 void Assembler::mneg(const Register& rd, const Register& rn,
1100 const Register& rm) {
1101 DCHECK(AreSameSizeAndType(rd, rn, rm));
1102 Register zr = AppropriateZeroRegFor(rn);
1103 DataProcessing3Source(rd, rn, rm, zr, MSUB);
1104 }
1105
msub(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1106 void Assembler::msub(const Register& rd, const Register& rn, const Register& rm,
1107 const Register& ra) {
1108 DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1109 DataProcessing3Source(rd, rn, rm, ra, MSUB);
1110 }
1111
smaddl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1112 void Assembler::smaddl(const Register& rd, const Register& rn,
1113 const Register& rm, const Register& ra) {
1114 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1115 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1116 DataProcessing3Source(rd, rn, rm, ra, SMADDL_x);
1117 }
1118
smsubl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1119 void Assembler::smsubl(const Register& rd, const Register& rn,
1120 const Register& rm, const Register& ra) {
1121 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1122 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1123 DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x);
1124 }
1125
umaddl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1126 void Assembler::umaddl(const Register& rd, const Register& rn,
1127 const Register& rm, const Register& ra) {
1128 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1129 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1130 DataProcessing3Source(rd, rn, rm, ra, UMADDL_x);
1131 }
1132
umsubl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1133 void Assembler::umsubl(const Register& rd, const Register& rn,
1134 const Register& rm, const Register& ra) {
1135 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1136 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1137 DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x);
1138 }
1139
smull(const Register & rd,const Register & rn,const Register & rm)1140 void Assembler::smull(const Register& rd, const Register& rn,
1141 const Register& rm) {
1142 DCHECK(rd.Is64Bits());
1143 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1144 DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x);
1145 }
1146
smulh(const Register & rd,const Register & rn,const Register & rm)1147 void Assembler::smulh(const Register& rd, const Register& rn,
1148 const Register& rm) {
1149 DCHECK(AreSameSizeAndType(rd, rn, rm));
1150 DataProcessing3Source(rd, rn, rm, xzr, SMULH_x);
1151 }
1152
sdiv(const Register & rd,const Register & rn,const Register & rm)1153 void Assembler::sdiv(const Register& rd, const Register& rn,
1154 const Register& rm) {
1155 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1156 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1157 Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd));
1158 }
1159
udiv(const Register & rd,const Register & rn,const Register & rm)1160 void Assembler::udiv(const Register& rd, const Register& rn,
1161 const Register& rm) {
1162 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1163 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1164 Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd));
1165 }
1166
rbit(const Register & rd,const Register & rn)1167 void Assembler::rbit(const Register& rd, const Register& rn) {
1168 DataProcessing1Source(rd, rn, RBIT);
1169 }
1170
rev16(const Register & rd,const Register & rn)1171 void Assembler::rev16(const Register& rd, const Register& rn) {
1172 DataProcessing1Source(rd, rn, REV16);
1173 }
1174
rev32(const Register & rd,const Register & rn)1175 void Assembler::rev32(const Register& rd, const Register& rn) {
1176 DCHECK(rd.Is64Bits());
1177 DataProcessing1Source(rd, rn, REV);
1178 }
1179
rev(const Register & rd,const Register & rn)1180 void Assembler::rev(const Register& rd, const Register& rn) {
1181 DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w);
1182 }
1183
clz(const Register & rd,const Register & rn)1184 void Assembler::clz(const Register& rd, const Register& rn) {
1185 DataProcessing1Source(rd, rn, CLZ);
1186 }
1187
cls(const Register & rd,const Register & rn)1188 void Assembler::cls(const Register& rd, const Register& rn) {
1189 DataProcessing1Source(rd, rn, CLS);
1190 }
1191
pacib1716()1192 void Assembler::pacib1716() { Emit(PACIB1716); }
autib1716()1193 void Assembler::autib1716() { Emit(AUTIB1716); }
pacibsp()1194 void Assembler::pacibsp() { Emit(PACIBSP); }
autibsp()1195 void Assembler::autibsp() { Emit(AUTIBSP); }
1196
bti(BranchTargetIdentifier id)1197 void Assembler::bti(BranchTargetIdentifier id) {
1198 SystemHint op;
1199 switch (id) {
1200 case BranchTargetIdentifier::kBti:
1201 op = BTI;
1202 break;
1203 case BranchTargetIdentifier::kBtiCall:
1204 op = BTI_c;
1205 break;
1206 case BranchTargetIdentifier::kBtiJump:
1207 op = BTI_j;
1208 break;
1209 case BranchTargetIdentifier::kBtiJumpCall:
1210 op = BTI_jc;
1211 break;
1212 case BranchTargetIdentifier::kNone:
1213 case BranchTargetIdentifier::kPacibsp:
1214 // We always want to generate a BTI instruction here, so disallow
1215 // skipping its generation or generating a PACIBSP instead.
1216 UNREACHABLE();
1217 }
1218 hint(op);
1219 }
1220
ldp(const CPURegister & rt,const CPURegister & rt2,const MemOperand & src)1221 void Assembler::ldp(const CPURegister& rt, const CPURegister& rt2,
1222 const MemOperand& src) {
1223 LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2));
1224 }
1225
stp(const CPURegister & rt,const CPURegister & rt2,const MemOperand & dst)1226 void Assembler::stp(const CPURegister& rt, const CPURegister& rt2,
1227 const MemOperand& dst) {
1228 LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2));
1229
1230 #if defined(V8_OS_WIN)
1231 if (xdata_encoder_ && rt == x29 && rt2 == lr && dst.base().IsSP()) {
1232 xdata_encoder_->onSaveFpLr();
1233 }
1234 #endif
1235 }
1236
ldpsw(const Register & rt,const Register & rt2,const MemOperand & src)1237 void Assembler::ldpsw(const Register& rt, const Register& rt2,
1238 const MemOperand& src) {
1239 DCHECK(rt.Is64Bits());
1240 LoadStorePair(rt, rt2, src, LDPSW_x);
1241 }
1242
LoadStorePair(const CPURegister & rt,const CPURegister & rt2,const MemOperand & addr,LoadStorePairOp op)1243 void Assembler::LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
1244 const MemOperand& addr, LoadStorePairOp op) {
1245 // 'rt' and 'rt2' can only be aliased for stores.
1246 DCHECK(((op & LoadStorePairLBit) == 0) || rt != rt2);
1247 DCHECK(AreSameSizeAndType(rt, rt2));
1248 DCHECK(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op)));
1249 int offset = static_cast<int>(addr.offset());
1250
1251 Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) |
1252 ImmLSPair(offset, CalcLSPairDataSize(op));
1253
1254 Instr addrmodeop;
1255 if (addr.IsImmediateOffset()) {
1256 addrmodeop = LoadStorePairOffsetFixed;
1257 } else {
1258 // Pre-index and post-index modes.
1259 DCHECK_NE(rt, addr.base());
1260 DCHECK_NE(rt2, addr.base());
1261 DCHECK_NE(addr.offset(), 0);
1262 if (addr.IsPreIndex()) {
1263 addrmodeop = LoadStorePairPreIndexFixed;
1264 } else {
1265 DCHECK(addr.IsPostIndex());
1266 addrmodeop = LoadStorePairPostIndexFixed;
1267 }
1268 }
1269 Emit(addrmodeop | memop);
1270 }
1271
1272 // Memory instructions.
ldrb(const Register & rt,const MemOperand & src)1273 void Assembler::ldrb(const Register& rt, const MemOperand& src) {
1274 LoadStore(rt, src, LDRB_w);
1275 }
1276
strb(const Register & rt,const MemOperand & dst)1277 void Assembler::strb(const Register& rt, const MemOperand& dst) {
1278 LoadStore(rt, dst, STRB_w);
1279 }
1280
ldrsb(const Register & rt,const MemOperand & src)1281 void Assembler::ldrsb(const Register& rt, const MemOperand& src) {
1282 LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w);
1283 }
1284
ldrh(const Register & rt,const MemOperand & src)1285 void Assembler::ldrh(const Register& rt, const MemOperand& src) {
1286 LoadStore(rt, src, LDRH_w);
1287 }
1288
strh(const Register & rt,const MemOperand & dst)1289 void Assembler::strh(const Register& rt, const MemOperand& dst) {
1290 LoadStore(rt, dst, STRH_w);
1291 }
1292
ldrsh(const Register & rt,const MemOperand & src)1293 void Assembler::ldrsh(const Register& rt, const MemOperand& src) {
1294 LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w);
1295 }
1296
ldr(const CPURegister & rt,const MemOperand & src)1297 void Assembler::ldr(const CPURegister& rt, const MemOperand& src) {
1298 LoadStore(rt, src, LoadOpFor(rt));
1299 }
1300
str(const CPURegister & rt,const MemOperand & src)1301 void Assembler::str(const CPURegister& rt, const MemOperand& src) {
1302 LoadStore(rt, src, StoreOpFor(rt));
1303 }
1304
ldrsw(const Register & rt,const MemOperand & src)1305 void Assembler::ldrsw(const Register& rt, const MemOperand& src) {
1306 DCHECK(rt.Is64Bits());
1307 LoadStore(rt, src, LDRSW_x);
1308 }
1309
ldr_pcrel(const CPURegister & rt,int imm19)1310 void Assembler::ldr_pcrel(const CPURegister& rt, int imm19) {
1311 // The pattern 'ldr xzr, #offset' is used to indicate the beginning of a
1312 // constant pool. It should not be emitted.
1313 DCHECK(!rt.IsZero());
1314 Emit(LoadLiteralOpFor(rt) | ImmLLiteral(imm19) | Rt(rt));
1315 }
1316
EmbeddedNumber(double number)1317 Operand Operand::EmbeddedNumber(double number) {
1318 int32_t smi;
1319 if (DoubleToSmiInteger(number, &smi)) {
1320 return Operand(Immediate(Smi::FromInt(smi)));
1321 }
1322 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1323 result.heap_object_request_.emplace(number);
1324 DCHECK(result.IsHeapObjectRequest());
1325 return result;
1326 }
1327
EmbeddedStringConstant(const StringConstantBase * str)1328 Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
1329 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1330 result.heap_object_request_.emplace(str);
1331 DCHECK(result.IsHeapObjectRequest());
1332 return result;
1333 }
1334
ldr(const CPURegister & rt,const Operand & operand)1335 void Assembler::ldr(const CPURegister& rt, const Operand& operand) {
1336 if (operand.IsHeapObjectRequest()) {
1337 BlockPoolsScope no_pool_before_ldr_of_heap_object_request(this);
1338 RequestHeapObject(operand.heap_object_request());
1339 ldr(rt, operand.immediate_for_heap_object_request());
1340 } else {
1341 ldr(rt, operand.immediate());
1342 }
1343 }
1344
ldr(const CPURegister & rt,const Immediate & imm)1345 void Assembler::ldr(const CPURegister& rt, const Immediate& imm) {
1346 BlockPoolsScope no_pool_before_ldr_pcrel_instr(this);
1347 RecordRelocInfo(imm.rmode(), imm.value());
1348 // The load will be patched when the constpool is emitted, patching code
1349 // expect a load literal with offset 0.
1350 ldr_pcrel(rt, 0);
1351 }
1352
ldar(const Register & rt,const Register & rn)1353 void Assembler::ldar(const Register& rt, const Register& rn) {
1354 DCHECK(rn.Is64Bits());
1355 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAR_w : LDAR_x;
1356 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1357 }
1358
ldaxr(const Register & rt,const Register & rn)1359 void Assembler::ldaxr(const Register& rt, const Register& rn) {
1360 DCHECK(rn.Is64Bits());
1361 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAXR_w : LDAXR_x;
1362 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1363 }
1364
stlr(const Register & rt,const Register & rn)1365 void Assembler::stlr(const Register& rt, const Register& rn) {
1366 DCHECK(rn.Is64Bits());
1367 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLR_w : STLR_x;
1368 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1369 }
1370
stlxr(const Register & rs,const Register & rt,const Register & rn)1371 void Assembler::stlxr(const Register& rs, const Register& rt,
1372 const Register& rn) {
1373 DCHECK(rn.Is64Bits());
1374 DCHECK(rs != rt && rs != rn);
1375 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLXR_w : STLXR_x;
1376 Emit(op | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1377 }
1378
ldarb(const Register & rt,const Register & rn)1379 void Assembler::ldarb(const Register& rt, const Register& rn) {
1380 DCHECK(rt.Is32Bits());
1381 DCHECK(rn.Is64Bits());
1382 Emit(LDAR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1383 }
1384
ldaxrb(const Register & rt,const Register & rn)1385 void Assembler::ldaxrb(const Register& rt, const Register& rn) {
1386 DCHECK(rt.Is32Bits());
1387 DCHECK(rn.Is64Bits());
1388 Emit(LDAXR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1389 }
1390
stlrb(const Register & rt,const Register & rn)1391 void Assembler::stlrb(const Register& rt, const Register& rn) {
1392 DCHECK(rt.Is32Bits());
1393 DCHECK(rn.Is64Bits());
1394 Emit(STLR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1395 }
1396
stlxrb(const Register & rs,const Register & rt,const Register & rn)1397 void Assembler::stlxrb(const Register& rs, const Register& rt,
1398 const Register& rn) {
1399 DCHECK(rs.Is32Bits());
1400 DCHECK(rt.Is32Bits());
1401 DCHECK(rn.Is64Bits());
1402 DCHECK(rs != rt && rs != rn);
1403 Emit(STLXR_b | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1404 }
1405
ldarh(const Register & rt,const Register & rn)1406 void Assembler::ldarh(const Register& rt, const Register& rn) {
1407 DCHECK(rt.Is32Bits());
1408 DCHECK(rn.Is64Bits());
1409 Emit(LDAR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1410 }
1411
ldaxrh(const Register & rt,const Register & rn)1412 void Assembler::ldaxrh(const Register& rt, const Register& rn) {
1413 DCHECK(rt.Is32Bits());
1414 DCHECK(rn.Is64Bits());
1415 Emit(LDAXR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1416 }
1417
stlrh(const Register & rt,const Register & rn)1418 void Assembler::stlrh(const Register& rt, const Register& rn) {
1419 DCHECK(rt.Is32Bits());
1420 DCHECK(rn.Is64Bits());
1421 Emit(STLR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1422 }
1423
stlxrh(const Register & rs,const Register & rt,const Register & rn)1424 void Assembler::stlxrh(const Register& rs, const Register& rt,
1425 const Register& rn) {
1426 DCHECK(rs.Is32Bits());
1427 DCHECK(rt.Is32Bits());
1428 DCHECK(rn.Is64Bits());
1429 DCHECK(rs != rt && rs != rn);
1430 Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1431 }
1432
NEON3DifferentL(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3DifferentOp vop)1433 void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
1434 const VRegister& vm, NEON3DifferentOp vop) {
1435 DCHECK(AreSameFormat(vn, vm));
1436 DCHECK((vn.Is1H() && vd.Is1S()) || (vn.Is1S() && vd.Is1D()) ||
1437 (vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
1438 (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
1439 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1440 Instr format, op = vop;
1441 if (vd.IsScalar()) {
1442 op |= NEON_Q | NEONScalar;
1443 format = SFormat(vn);
1444 } else {
1445 format = VFormat(vn);
1446 }
1447 Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
1448 }
1449
NEON3DifferentW(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3DifferentOp vop)1450 void Assembler::NEON3DifferentW(const VRegister& vd, const VRegister& vn,
1451 const VRegister& vm, NEON3DifferentOp vop) {
1452 DCHECK(AreSameFormat(vd, vn));
1453 DCHECK((vm.Is8B() && vd.Is8H()) || (vm.Is4H() && vd.Is4S()) ||
1454 (vm.Is2S() && vd.Is2D()) || (vm.Is16B() && vd.Is8H()) ||
1455 (vm.Is8H() && vd.Is4S()) || (vm.Is4S() && vd.Is2D()));
1456 Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
1457 }
1458
NEON3DifferentHN(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3DifferentOp vop)1459 void Assembler::NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
1460 const VRegister& vm, NEON3DifferentOp vop) {
1461 DCHECK(AreSameFormat(vm, vn));
1462 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
1463 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
1464 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
1465 Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
1466 }
1467
1468 #define NEON_3DIFF_LONG_LIST(V) \
1469 V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \
1470 V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \
1471 V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \
1472 V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \
1473 V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \
1474 V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \
1475 V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \
1476 V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \
1477 V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \
1478 V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \
1479 V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \
1480 V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \
1481 V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \
1482 V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \
1483 V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \
1484 V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \
1485 V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \
1486 V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \
1487 V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \
1488 V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \
1489 V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \
1490 V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \
1491 V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \
1492 V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \
1493 V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \
1494 V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \
1495 V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \
1496 V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \
1497 V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \
1498 V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \
1499 V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1500 V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
1501 V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1502 V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
1503 V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1504 V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S())
1505
1506 #define DEFINE_ASM_FUNC(FN, OP, AS) \
1507 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
1508 const VRegister& vm) { \
1509 DCHECK(AS); \
1510 NEON3DifferentL(vd, vn, vm, OP); \
1511 }
1512 NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
1513 #undef DEFINE_ASM_FUNC
1514
1515 #define NEON_3DIFF_HN_LIST(V) \
1516 V(addhn, NEON_ADDHN, vd.IsD()) \
1517 V(addhn2, NEON_ADDHN2, vd.IsQ()) \
1518 V(raddhn, NEON_RADDHN, vd.IsD()) \
1519 V(raddhn2, NEON_RADDHN2, vd.IsQ()) \
1520 V(subhn, NEON_SUBHN, vd.IsD()) \
1521 V(subhn2, NEON_SUBHN2, vd.IsQ()) \
1522 V(rsubhn, NEON_RSUBHN, vd.IsD()) \
1523 V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
1524
1525 #define DEFINE_ASM_FUNC(FN, OP, AS) \
1526 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
1527 const VRegister& vm) { \
1528 DCHECK(AS); \
1529 NEON3DifferentHN(vd, vn, vm, OP); \
1530 }
NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)1531 NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
1532 #undef DEFINE_ASM_FUNC
1533
1534 void Assembler::NEONPerm(const VRegister& vd, const VRegister& vn,
1535 const VRegister& vm, NEONPermOp op) {
1536 DCHECK(AreSameFormat(vd, vn, vm));
1537 DCHECK(!vd.Is1D());
1538 Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
1539 }
1540
trn1(const VRegister & vd,const VRegister & vn,const VRegister & vm)1541 void Assembler::trn1(const VRegister& vd, const VRegister& vn,
1542 const VRegister& vm) {
1543 NEONPerm(vd, vn, vm, NEON_TRN1);
1544 }
1545
trn2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1546 void Assembler::trn2(const VRegister& vd, const VRegister& vn,
1547 const VRegister& vm) {
1548 NEONPerm(vd, vn, vm, NEON_TRN2);
1549 }
1550
uzp1(const VRegister & vd,const VRegister & vn,const VRegister & vm)1551 void Assembler::uzp1(const VRegister& vd, const VRegister& vn,
1552 const VRegister& vm) {
1553 NEONPerm(vd, vn, vm, NEON_UZP1);
1554 }
1555
uzp2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1556 void Assembler::uzp2(const VRegister& vd, const VRegister& vn,
1557 const VRegister& vm) {
1558 NEONPerm(vd, vn, vm, NEON_UZP2);
1559 }
1560
zip1(const VRegister & vd,const VRegister & vn,const VRegister & vm)1561 void Assembler::zip1(const VRegister& vd, const VRegister& vn,
1562 const VRegister& vm) {
1563 NEONPerm(vd, vn, vm, NEON_ZIP1);
1564 }
1565
zip2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1566 void Assembler::zip2(const VRegister& vd, const VRegister& vn,
1567 const VRegister& vm) {
1568 NEONPerm(vd, vn, vm, NEON_ZIP2);
1569 }
1570
NEONShiftImmediate(const VRegister & vd,const VRegister & vn,NEONShiftImmediateOp op,int immh_immb)1571 void Assembler::NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
1572 NEONShiftImmediateOp op, int immh_immb) {
1573 DCHECK(AreSameFormat(vd, vn));
1574 Instr q, scalar;
1575 if (vn.IsScalar()) {
1576 q = NEON_Q;
1577 scalar = NEONScalar;
1578 } else {
1579 q = vd.IsD() ? 0 : NEON_Q;
1580 scalar = 0;
1581 }
1582 Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
1583 }
1584
NEONShiftLeftImmediate(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1585 void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
1586 int shift, NEONShiftImmediateOp op) {
1587 int laneSizeInBits = vn.LaneSizeInBits();
1588 DCHECK((shift >= 0) && (shift < laneSizeInBits));
1589 NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
1590 }
1591
NEONShiftRightImmediate(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1592 void Assembler::NEONShiftRightImmediate(const VRegister& vd,
1593 const VRegister& vn, int shift,
1594 NEONShiftImmediateOp op) {
1595 int laneSizeInBits = vn.LaneSizeInBits();
1596 DCHECK((shift >= 1) && (shift <= laneSizeInBits));
1597 NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
1598 }
1599
NEONShiftImmediateL(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1600 void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn,
1601 int shift, NEONShiftImmediateOp op) {
1602 int laneSizeInBits = vn.LaneSizeInBits();
1603 DCHECK((shift >= 0) && (shift < laneSizeInBits));
1604 int immh_immb = (laneSizeInBits + shift) << 16;
1605
1606 DCHECK((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
1607 (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
1608 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1609 Instr q;
1610 q = vn.IsD() ? 0 : NEON_Q;
1611 Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
1612 }
1613
NEONShiftImmediateN(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1614 void Assembler::NEONShiftImmediateN(const VRegister& vd, const VRegister& vn,
1615 int shift, NEONShiftImmediateOp op) {
1616 Instr q, scalar;
1617 int laneSizeInBits = vd.LaneSizeInBits();
1618 DCHECK((shift >= 1) && (shift <= laneSizeInBits));
1619 int immh_immb = (2 * laneSizeInBits - shift) << 16;
1620
1621 if (vn.IsScalar()) {
1622 DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
1623 (vd.Is1S() && vn.Is1D()));
1624 q = NEON_Q;
1625 scalar = NEONScalar;
1626 } else {
1627 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
1628 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
1629 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
1630 scalar = 0;
1631 q = vd.IsD() ? 0 : NEON_Q;
1632 }
1633 Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
1634 }
1635
shl(const VRegister & vd,const VRegister & vn,int shift)1636 void Assembler::shl(const VRegister& vd, const VRegister& vn, int shift) {
1637 DCHECK(vd.IsVector() || vd.Is1D());
1638 NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
1639 }
1640
sli(const VRegister & vd,const VRegister & vn,int shift)1641 void Assembler::sli(const VRegister& vd, const VRegister& vn, int shift) {
1642 DCHECK(vd.IsVector() || vd.Is1D());
1643 NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
1644 }
1645
sqshl(const VRegister & vd,const VRegister & vn,int shift)1646 void Assembler::sqshl(const VRegister& vd, const VRegister& vn, int shift) {
1647 NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
1648 }
1649
sqshlu(const VRegister & vd,const VRegister & vn,int shift)1650 void Assembler::sqshlu(const VRegister& vd, const VRegister& vn, int shift) {
1651 NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
1652 }
1653
uqshl(const VRegister & vd,const VRegister & vn,int shift)1654 void Assembler::uqshl(const VRegister& vd, const VRegister& vn, int shift) {
1655 NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
1656 }
1657
sshll(const VRegister & vd,const VRegister & vn,int shift)1658 void Assembler::sshll(const VRegister& vd, const VRegister& vn, int shift) {
1659 DCHECK(vn.IsD());
1660 NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1661 }
1662
sshll2(const VRegister & vd,const VRegister & vn,int shift)1663 void Assembler::sshll2(const VRegister& vd, const VRegister& vn, int shift) {
1664 DCHECK(vn.IsQ());
1665 NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1666 }
1667
sxtl(const VRegister & vd,const VRegister & vn)1668 void Assembler::sxtl(const VRegister& vd, const VRegister& vn) {
1669 sshll(vd, vn, 0);
1670 }
1671
sxtl2(const VRegister & vd,const VRegister & vn)1672 void Assembler::sxtl2(const VRegister& vd, const VRegister& vn) {
1673 sshll2(vd, vn, 0);
1674 }
1675
ushll(const VRegister & vd,const VRegister & vn,int shift)1676 void Assembler::ushll(const VRegister& vd, const VRegister& vn, int shift) {
1677 DCHECK(vn.IsD());
1678 NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1679 }
1680
ushll2(const VRegister & vd,const VRegister & vn,int shift)1681 void Assembler::ushll2(const VRegister& vd, const VRegister& vn, int shift) {
1682 DCHECK(vn.IsQ());
1683 NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1684 }
1685
uxtl(const VRegister & vd,const VRegister & vn)1686 void Assembler::uxtl(const VRegister& vd, const VRegister& vn) {
1687 ushll(vd, vn, 0);
1688 }
1689
uxtl2(const VRegister & vd,const VRegister & vn)1690 void Assembler::uxtl2(const VRegister& vd, const VRegister& vn) {
1691 ushll2(vd, vn, 0);
1692 }
1693
sri(const VRegister & vd,const VRegister & vn,int shift)1694 void Assembler::sri(const VRegister& vd, const VRegister& vn, int shift) {
1695 DCHECK(vd.IsVector() || vd.Is1D());
1696 NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
1697 }
1698
sshr(const VRegister & vd,const VRegister & vn,int shift)1699 void Assembler::sshr(const VRegister& vd, const VRegister& vn, int shift) {
1700 DCHECK(vd.IsVector() || vd.Is1D());
1701 NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
1702 }
1703
ushr(const VRegister & vd,const VRegister & vn,int shift)1704 void Assembler::ushr(const VRegister& vd, const VRegister& vn, int shift) {
1705 DCHECK(vd.IsVector() || vd.Is1D());
1706 NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
1707 }
1708
srshr(const VRegister & vd,const VRegister & vn,int shift)1709 void Assembler::srshr(const VRegister& vd, const VRegister& vn, int shift) {
1710 DCHECK(vd.IsVector() || vd.Is1D());
1711 NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
1712 }
1713
urshr(const VRegister & vd,const VRegister & vn,int shift)1714 void Assembler::urshr(const VRegister& vd, const VRegister& vn, int shift) {
1715 DCHECK(vd.IsVector() || vd.Is1D());
1716 NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
1717 }
1718
ssra(const VRegister & vd,const VRegister & vn,int shift)1719 void Assembler::ssra(const VRegister& vd, const VRegister& vn, int shift) {
1720 DCHECK(vd.IsVector() || vd.Is1D());
1721 NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
1722 }
1723
usra(const VRegister & vd,const VRegister & vn,int shift)1724 void Assembler::usra(const VRegister& vd, const VRegister& vn, int shift) {
1725 DCHECK(vd.IsVector() || vd.Is1D());
1726 NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
1727 }
1728
srsra(const VRegister & vd,const VRegister & vn,int shift)1729 void Assembler::srsra(const VRegister& vd, const VRegister& vn, int shift) {
1730 DCHECK(vd.IsVector() || vd.Is1D());
1731 NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
1732 }
1733
ursra(const VRegister & vd,const VRegister & vn,int shift)1734 void Assembler::ursra(const VRegister& vd, const VRegister& vn, int shift) {
1735 DCHECK(vd.IsVector() || vd.Is1D());
1736 NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
1737 }
1738
shrn(const VRegister & vd,const VRegister & vn,int shift)1739 void Assembler::shrn(const VRegister& vd, const VRegister& vn, int shift) {
1740 DCHECK(vn.IsVector() && vd.IsD());
1741 NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1742 }
1743
shrn2(const VRegister & vd,const VRegister & vn,int shift)1744 void Assembler::shrn2(const VRegister& vd, const VRegister& vn, int shift) {
1745 DCHECK(vn.IsVector() && vd.IsQ());
1746 NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1747 }
1748
rshrn(const VRegister & vd,const VRegister & vn,int shift)1749 void Assembler::rshrn(const VRegister& vd, const VRegister& vn, int shift) {
1750 DCHECK(vn.IsVector() && vd.IsD());
1751 NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
1752 }
1753
rshrn2(const VRegister & vd,const VRegister & vn,int shift)1754 void Assembler::rshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1755 DCHECK(vn.IsVector() && vd.IsQ());
1756 NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
1757 }
1758
sqshrn(const VRegister & vd,const VRegister & vn,int shift)1759 void Assembler::sqshrn(const VRegister& vd, const VRegister& vn, int shift) {
1760 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1761 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
1762 }
1763
sqshrn2(const VRegister & vd,const VRegister & vn,int shift)1764 void Assembler::sqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1765 DCHECK(vn.IsVector() && vd.IsQ());
1766 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
1767 }
1768
sqrshrn(const VRegister & vd,const VRegister & vn,int shift)1769 void Assembler::sqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
1770 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1771 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
1772 }
1773
sqrshrn2(const VRegister & vd,const VRegister & vn,int shift)1774 void Assembler::sqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1775 DCHECK(vn.IsVector() && vd.IsQ());
1776 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
1777 }
1778
sqshrun(const VRegister & vd,const VRegister & vn,int shift)1779 void Assembler::sqshrun(const VRegister& vd, const VRegister& vn, int shift) {
1780 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1781 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
1782 }
1783
sqshrun2(const VRegister & vd,const VRegister & vn,int shift)1784 void Assembler::sqshrun2(const VRegister& vd, const VRegister& vn, int shift) {
1785 DCHECK(vn.IsVector() && vd.IsQ());
1786 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
1787 }
1788
sqrshrun(const VRegister & vd,const VRegister & vn,int shift)1789 void Assembler::sqrshrun(const VRegister& vd, const VRegister& vn, int shift) {
1790 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1791 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
1792 }
1793
sqrshrun2(const VRegister & vd,const VRegister & vn,int shift)1794 void Assembler::sqrshrun2(const VRegister& vd, const VRegister& vn, int shift) {
1795 DCHECK(vn.IsVector() && vd.IsQ());
1796 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
1797 }
1798
uqshrn(const VRegister & vd,const VRegister & vn,int shift)1799 void Assembler::uqshrn(const VRegister& vd, const VRegister& vn, int shift) {
1800 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1801 NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
1802 }
1803
uqshrn2(const VRegister & vd,const VRegister & vn,int shift)1804 void Assembler::uqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1805 DCHECK(vn.IsVector() && vd.IsQ());
1806 NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
1807 }
1808
uqrshrn(const VRegister & vd,const VRegister & vn,int shift)1809 void Assembler::uqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
1810 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1811 NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
1812 }
1813
uqrshrn2(const VRegister & vd,const VRegister & vn,int shift)1814 void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1815 DCHECK(vn.IsVector() && vd.IsQ());
1816 NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
1817 }
1818
uaddw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1819 void Assembler::uaddw(const VRegister& vd, const VRegister& vn,
1820 const VRegister& vm) {
1821 DCHECK(vm.IsD());
1822 NEON3DifferentW(vd, vn, vm, NEON_UADDW);
1823 }
1824
uaddw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1825 void Assembler::uaddw2(const VRegister& vd, const VRegister& vn,
1826 const VRegister& vm) {
1827 DCHECK(vm.IsQ());
1828 NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
1829 }
1830
saddw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1831 void Assembler::saddw(const VRegister& vd, const VRegister& vn,
1832 const VRegister& vm) {
1833 DCHECK(vm.IsD());
1834 NEON3DifferentW(vd, vn, vm, NEON_SADDW);
1835 }
1836
saddw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1837 void Assembler::saddw2(const VRegister& vd, const VRegister& vn,
1838 const VRegister& vm) {
1839 DCHECK(vm.IsQ());
1840 NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
1841 }
1842
usubw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1843 void Assembler::usubw(const VRegister& vd, const VRegister& vn,
1844 const VRegister& vm) {
1845 DCHECK(vm.IsD());
1846 NEON3DifferentW(vd, vn, vm, NEON_USUBW);
1847 }
1848
usubw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1849 void Assembler::usubw2(const VRegister& vd, const VRegister& vn,
1850 const VRegister& vm) {
1851 DCHECK(vm.IsQ());
1852 NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
1853 }
1854
ssubw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1855 void Assembler::ssubw(const VRegister& vd, const VRegister& vn,
1856 const VRegister& vm) {
1857 DCHECK(vm.IsD());
1858 NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
1859 }
1860
ssubw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1861 void Assembler::ssubw2(const VRegister& vd, const VRegister& vn,
1862 const VRegister& vm) {
1863 DCHECK(vm.IsQ());
1864 NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
1865 }
1866
mov(const Register & rd,const Register & rm)1867 void Assembler::mov(const Register& rd, const Register& rm) {
1868 // Moves involving the stack pointer are encoded as add immediate with
1869 // second operand of zero. Otherwise, orr with first operand zr is
1870 // used.
1871 if (rd.IsSP() || rm.IsSP()) {
1872 add(rd, rm, 0);
1873 } else {
1874 orr(rd, AppropriateZeroRegFor(rd), rm);
1875 }
1876 }
1877
ins(const VRegister & vd,int vd_index,const Register & rn)1878 void Assembler::ins(const VRegister& vd, int vd_index, const Register& rn) {
1879 // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
1880 // number of lanes, and T is b, h, s or d.
1881 int lane_size = vd.LaneSizeInBytes();
1882 NEONFormatField format;
1883 switch (lane_size) {
1884 case 1:
1885 format = NEON_16B;
1886 DCHECK(rn.IsW());
1887 break;
1888 case 2:
1889 format = NEON_8H;
1890 DCHECK(rn.IsW());
1891 break;
1892 case 4:
1893 format = NEON_4S;
1894 DCHECK(rn.IsW());
1895 break;
1896 default:
1897 DCHECK_EQ(lane_size, 8);
1898 DCHECK(rn.IsX());
1899 format = NEON_2D;
1900 break;
1901 }
1902
1903 DCHECK((0 <= vd_index) &&
1904 (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
1905 Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
1906 }
1907
mov(const Register & rd,const VRegister & vn,int vn_index)1908 void Assembler::mov(const Register& rd, const VRegister& vn, int vn_index) {
1909 DCHECK_GE(vn.SizeInBytes(), 4);
1910 umov(rd, vn, vn_index);
1911 }
1912
smov(const Register & rd,const VRegister & vn,int vn_index)1913 void Assembler::smov(const Register& rd, const VRegister& vn, int vn_index) {
1914 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
1915 // number of lanes, and T is b, h, s.
1916 int lane_size = vn.LaneSizeInBytes();
1917 NEONFormatField format;
1918 Instr q = 0;
1919 switch (lane_size) {
1920 case 1:
1921 format = NEON_16B;
1922 break;
1923 case 2:
1924 format = NEON_8H;
1925 break;
1926 default:
1927 DCHECK_EQ(lane_size, 4);
1928 DCHECK(rd.IsX());
1929 format = NEON_4S;
1930 break;
1931 }
1932 q = rd.IsW() ? 0 : NEON_Q;
1933 DCHECK((0 <= vn_index) &&
1934 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
1935 Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
1936 }
1937
cls(const VRegister & vd,const VRegister & vn)1938 void Assembler::cls(const VRegister& vd, const VRegister& vn) {
1939 DCHECK(AreSameFormat(vd, vn));
1940 DCHECK(!vd.Is1D() && !vd.Is2D());
1941 Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
1942 }
1943
clz(const VRegister & vd,const VRegister & vn)1944 void Assembler::clz(const VRegister& vd, const VRegister& vn) {
1945 DCHECK(AreSameFormat(vd, vn));
1946 DCHECK(!vd.Is1D() && !vd.Is2D());
1947 Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
1948 }
1949
cnt(const VRegister & vd,const VRegister & vn)1950 void Assembler::cnt(const VRegister& vd, const VRegister& vn) {
1951 DCHECK(AreSameFormat(vd, vn));
1952 DCHECK(vd.Is8B() || vd.Is16B());
1953 Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
1954 }
1955
rev16(const VRegister & vd,const VRegister & vn)1956 void Assembler::rev16(const VRegister& vd, const VRegister& vn) {
1957 DCHECK(AreSameFormat(vd, vn));
1958 DCHECK(vd.Is8B() || vd.Is16B());
1959 Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
1960 }
1961
rev32(const VRegister & vd,const VRegister & vn)1962 void Assembler::rev32(const VRegister& vd, const VRegister& vn) {
1963 DCHECK(AreSameFormat(vd, vn));
1964 DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
1965 Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
1966 }
1967
rev64(const VRegister & vd,const VRegister & vn)1968 void Assembler::rev64(const VRegister& vd, const VRegister& vn) {
1969 DCHECK(AreSameFormat(vd, vn));
1970 DCHECK(!vd.Is1D() && !vd.Is2D());
1971 Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
1972 }
1973
ursqrte(const VRegister & vd,const VRegister & vn)1974 void Assembler::ursqrte(const VRegister& vd, const VRegister& vn) {
1975 DCHECK(AreSameFormat(vd, vn));
1976 DCHECK(vd.Is2S() || vd.Is4S());
1977 Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
1978 }
1979
urecpe(const VRegister & vd,const VRegister & vn)1980 void Assembler::urecpe(const VRegister& vd, const VRegister& vn) {
1981 DCHECK(AreSameFormat(vd, vn));
1982 DCHECK(vd.Is2S() || vd.Is4S());
1983 Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
1984 }
1985
NEONAddlp(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp op)1986 void Assembler::NEONAddlp(const VRegister& vd, const VRegister& vn,
1987 NEON2RegMiscOp op) {
1988 DCHECK((op == NEON_SADDLP) || (op == NEON_UADDLP) || (op == NEON_SADALP) ||
1989 (op == NEON_UADALP));
1990
1991 DCHECK((vn.Is8B() && vd.Is4H()) || (vn.Is4H() && vd.Is2S()) ||
1992 (vn.Is2S() && vd.Is1D()) || (vn.Is16B() && vd.Is8H()) ||
1993 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1994 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
1995 }
1996
saddlp(const VRegister & vd,const VRegister & vn)1997 void Assembler::saddlp(const VRegister& vd, const VRegister& vn) {
1998 NEONAddlp(vd, vn, NEON_SADDLP);
1999 }
2000
uaddlp(const VRegister & vd,const VRegister & vn)2001 void Assembler::uaddlp(const VRegister& vd, const VRegister& vn) {
2002 NEONAddlp(vd, vn, NEON_UADDLP);
2003 }
2004
sadalp(const VRegister & vd,const VRegister & vn)2005 void Assembler::sadalp(const VRegister& vd, const VRegister& vn) {
2006 NEONAddlp(vd, vn, NEON_SADALP);
2007 }
2008
uadalp(const VRegister & vd,const VRegister & vn)2009 void Assembler::uadalp(const VRegister& vd, const VRegister& vn) {
2010 NEONAddlp(vd, vn, NEON_UADALP);
2011 }
2012
NEONAcrossLanesL(const VRegister & vd,const VRegister & vn,NEONAcrossLanesOp op)2013 void Assembler::NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
2014 NEONAcrossLanesOp op) {
2015 DCHECK((vn.Is8B() && vd.Is1H()) || (vn.Is16B() && vd.Is1H()) ||
2016 (vn.Is4H() && vd.Is1S()) || (vn.Is8H() && vd.Is1S()) ||
2017 (vn.Is4S() && vd.Is1D()));
2018 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
2019 }
2020
saddlv(const VRegister & vd,const VRegister & vn)2021 void Assembler::saddlv(const VRegister& vd, const VRegister& vn) {
2022 NEONAcrossLanesL(vd, vn, NEON_SADDLV);
2023 }
2024
uaddlv(const VRegister & vd,const VRegister & vn)2025 void Assembler::uaddlv(const VRegister& vd, const VRegister& vn) {
2026 NEONAcrossLanesL(vd, vn, NEON_UADDLV);
2027 }
2028
NEONAcrossLanes(const VRegister & vd,const VRegister & vn,NEONAcrossLanesOp op)2029 void Assembler::NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
2030 NEONAcrossLanesOp op) {
2031 DCHECK((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) ||
2032 (vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) ||
2033 (vn.Is4S() && vd.Is1S()));
2034 if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
2035 Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
2036 } else {
2037 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
2038 }
2039 }
2040
2041 #define NEON_ACROSSLANES_LIST(V) \
2042 V(fmaxv, NEON_FMAXV, vd.Is1S()) \
2043 V(fminv, NEON_FMINV, vd.Is1S()) \
2044 V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
2045 V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
2046 V(addv, NEON_ADDV, true) \
2047 V(smaxv, NEON_SMAXV, true) \
2048 V(sminv, NEON_SMINV, true) \
2049 V(umaxv, NEON_UMAXV, true) \
2050 V(uminv, NEON_UMINV, true)
2051
2052 #define DEFINE_ASM_FUNC(FN, OP, AS) \
2053 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
2054 DCHECK(AS); \
2055 NEONAcrossLanes(vd, vn, OP); \
2056 }
NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)2057 NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
2058 #undef DEFINE_ASM_FUNC
2059
2060 void Assembler::mov(const VRegister& vd, int vd_index, const Register& rn) {
2061 ins(vd, vd_index, rn);
2062 }
2063
umov(const Register & rd,const VRegister & vn,int vn_index)2064 void Assembler::umov(const Register& rd, const VRegister& vn, int vn_index) {
2065 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
2066 // number of lanes, and T is b, h, s or d.
2067 int lane_size = vn.LaneSizeInBytes();
2068 NEONFormatField format;
2069 Instr q = 0;
2070 switch (lane_size) {
2071 case 1:
2072 format = NEON_16B;
2073 DCHECK(rd.IsW());
2074 break;
2075 case 2:
2076 format = NEON_8H;
2077 DCHECK(rd.IsW());
2078 break;
2079 case 4:
2080 format = NEON_4S;
2081 DCHECK(rd.IsW());
2082 break;
2083 default:
2084 DCHECK_EQ(lane_size, 8);
2085 DCHECK(rd.IsX());
2086 format = NEON_2D;
2087 q = NEON_Q;
2088 break;
2089 }
2090
2091 DCHECK((0 <= vn_index) &&
2092 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2093 Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
2094 }
2095
mov(const VRegister & vd,const VRegister & vn,int vn_index)2096 void Assembler::mov(const VRegister& vd, const VRegister& vn, int vn_index) {
2097 DCHECK(vd.IsScalar());
2098 dup(vd, vn, vn_index);
2099 }
2100
dup(const VRegister & vd,const Register & rn)2101 void Assembler::dup(const VRegister& vd, const Register& rn) {
2102 DCHECK(!vd.Is1D());
2103 DCHECK_EQ(vd.Is2D(), rn.IsX());
2104 Instr q = vd.IsD() ? 0 : NEON_Q;
2105 Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
2106 }
2107
ins(const VRegister & vd,int vd_index,const VRegister & vn,int vn_index)2108 void Assembler::ins(const VRegister& vd, int vd_index, const VRegister& vn,
2109 int vn_index) {
2110 DCHECK(AreSameFormat(vd, vn));
2111 // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
2112 // number of lanes, and T is b, h, s or d.
2113 int lane_size = vd.LaneSizeInBytes();
2114 NEONFormatField format;
2115 switch (lane_size) {
2116 case 1:
2117 format = NEON_16B;
2118 break;
2119 case 2:
2120 format = NEON_8H;
2121 break;
2122 case 4:
2123 format = NEON_4S;
2124 break;
2125 default:
2126 DCHECK_EQ(lane_size, 8);
2127 format = NEON_2D;
2128 break;
2129 }
2130
2131 DCHECK((0 <= vd_index) &&
2132 (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2133 DCHECK((0 <= vn_index) &&
2134 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2135 Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
2136 ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
2137 }
2138
NEONTable(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEONTableOp op)2139 void Assembler::NEONTable(const VRegister& vd, const VRegister& vn,
2140 const VRegister& vm, NEONTableOp op) {
2141 DCHECK(vd.Is16B() || vd.Is8B());
2142 DCHECK(vn.Is16B());
2143 DCHECK(AreSameFormat(vd, vm));
2144 Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
2145 }
2146
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vm)2147 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2148 const VRegister& vm) {
2149 NEONTable(vd, vn, vm, NEON_TBL_1v);
2150 }
2151
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vm)2152 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2153 const VRegister& vn2, const VRegister& vm) {
2154 USE(vn2);
2155 DCHECK(AreSameFormat(vn, vn2));
2156 DCHECK(AreConsecutive(vn, vn2));
2157 NEONTable(vd, vn, vm, NEON_TBL_2v);
2158 }
2159
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vm)2160 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2161 const VRegister& vn2, const VRegister& vn3,
2162 const VRegister& vm) {
2163 USE(vn2);
2164 USE(vn3);
2165 DCHECK(AreSameFormat(vn, vn2, vn3));
2166 DCHECK(AreConsecutive(vn, vn2, vn3));
2167 NEONTable(vd, vn, vm, NEON_TBL_3v);
2168 }
2169
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vn4,const VRegister & vm)2170 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2171 const VRegister& vn2, const VRegister& vn3,
2172 const VRegister& vn4, const VRegister& vm) {
2173 USE(vn2);
2174 USE(vn3);
2175 USE(vn4);
2176 DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
2177 DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
2178 NEONTable(vd, vn, vm, NEON_TBL_4v);
2179 }
2180
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vm)2181 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2182 const VRegister& vm) {
2183 NEONTable(vd, vn, vm, NEON_TBX_1v);
2184 }
2185
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vm)2186 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2187 const VRegister& vn2, const VRegister& vm) {
2188 USE(vn2);
2189 DCHECK(AreSameFormat(vn, vn2));
2190 DCHECK(AreConsecutive(vn, vn2));
2191 NEONTable(vd, vn, vm, NEON_TBX_2v);
2192 }
2193
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vm)2194 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2195 const VRegister& vn2, const VRegister& vn3,
2196 const VRegister& vm) {
2197 USE(vn2);
2198 USE(vn3);
2199 DCHECK(AreSameFormat(vn, vn2, vn3));
2200 DCHECK(AreConsecutive(vn, vn2, vn3));
2201 NEONTable(vd, vn, vm, NEON_TBX_3v);
2202 }
2203
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vn4,const VRegister & vm)2204 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2205 const VRegister& vn2, const VRegister& vn3,
2206 const VRegister& vn4, const VRegister& vm) {
2207 USE(vn2);
2208 USE(vn3);
2209 USE(vn4);
2210 DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
2211 DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
2212 NEONTable(vd, vn, vm, NEON_TBX_4v);
2213 }
2214
mov(const VRegister & vd,int vd_index,const VRegister & vn,int vn_index)2215 void Assembler::mov(const VRegister& vd, int vd_index, const VRegister& vn,
2216 int vn_index) {
2217 ins(vd, vd_index, vn, vn_index);
2218 }
2219
mvn(const Register & rd,const Operand & operand)2220 void Assembler::mvn(const Register& rd, const Operand& operand) {
2221 orn(rd, AppropriateZeroRegFor(rd), operand);
2222 }
2223
mrs(const Register & rt,SystemRegister sysreg)2224 void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
2225 DCHECK(rt.Is64Bits());
2226 Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
2227 }
2228
msr(SystemRegister sysreg,const Register & rt)2229 void Assembler::msr(SystemRegister sysreg, const Register& rt) {
2230 DCHECK(rt.Is64Bits());
2231 Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
2232 }
2233
hint(SystemHint code)2234 void Assembler::hint(SystemHint code) { Emit(HINT | ImmHint(code) | Rt(xzr)); }
2235
2236 // NEON structure loads and stores.
LoadStoreStructAddrModeField(const MemOperand & addr)2237 Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
2238 Instr addr_field = RnSP(addr.base());
2239
2240 if (addr.IsPostIndex()) {
2241 static_assert(NEONLoadStoreMultiStructPostIndex ==
2242 static_cast<NEONLoadStoreMultiStructPostIndexOp>(
2243 NEONLoadStoreSingleStructPostIndex),
2244 "Opcodes must match for NEON post index memop.");
2245
2246 addr_field |= NEONLoadStoreMultiStructPostIndex;
2247 if (addr.offset() == 0) {
2248 addr_field |= RmNot31(addr.regoffset());
2249 } else {
2250 // The immediate post index addressing mode is indicated by rm = 31.
2251 // The immediate is implied by the number of vector registers used.
2252 addr_field |= (0x1F << Rm_offset);
2253 }
2254 } else {
2255 DCHECK(addr.IsImmediateOffset() && (addr.offset() == 0));
2256 }
2257 return addr_field;
2258 }
2259
LoadStoreStructVerify(const VRegister & vt,const MemOperand & addr,Instr op)2260 void Assembler::LoadStoreStructVerify(const VRegister& vt,
2261 const MemOperand& addr, Instr op) {
2262 #ifdef DEBUG
2263 // Assert that addressing mode is either offset (with immediate 0), post
2264 // index by immediate of the size of the register list, or post index by a
2265 // value in a core register.
2266 if (addr.IsImmediateOffset()) {
2267 DCHECK_EQ(addr.offset(), 0);
2268 } else {
2269 int offset = vt.SizeInBytes();
2270 switch (op) {
2271 case NEON_LD1_1v:
2272 case NEON_ST1_1v:
2273 offset *= 1;
2274 break;
2275 case NEONLoadStoreSingleStructLoad1:
2276 case NEONLoadStoreSingleStructStore1:
2277 case NEON_LD1R:
2278 offset = (offset / vt.LaneCount()) * 1;
2279 break;
2280
2281 case NEON_LD1_2v:
2282 case NEON_ST1_2v:
2283 case NEON_LD2:
2284 case NEON_ST2:
2285 offset *= 2;
2286 break;
2287 case NEONLoadStoreSingleStructLoad2:
2288 case NEONLoadStoreSingleStructStore2:
2289 case NEON_LD2R:
2290 offset = (offset / vt.LaneCount()) * 2;
2291 break;
2292
2293 case NEON_LD1_3v:
2294 case NEON_ST1_3v:
2295 case NEON_LD3:
2296 case NEON_ST3:
2297 offset *= 3;
2298 break;
2299 case NEONLoadStoreSingleStructLoad3:
2300 case NEONLoadStoreSingleStructStore3:
2301 case NEON_LD3R:
2302 offset = (offset / vt.LaneCount()) * 3;
2303 break;
2304
2305 case NEON_LD1_4v:
2306 case NEON_ST1_4v:
2307 case NEON_LD4:
2308 case NEON_ST4:
2309 offset *= 4;
2310 break;
2311 case NEONLoadStoreSingleStructLoad4:
2312 case NEONLoadStoreSingleStructStore4:
2313 case NEON_LD4R:
2314 offset = (offset / vt.LaneCount()) * 4;
2315 break;
2316 default:
2317 UNREACHABLE();
2318 }
2319 DCHECK(addr.regoffset() != NoReg || addr.offset() == offset);
2320 }
2321 #else
2322 USE(vt);
2323 USE(addr);
2324 USE(op);
2325 #endif
2326 }
2327
LoadStoreStruct(const VRegister & vt,const MemOperand & addr,NEONLoadStoreMultiStructOp op)2328 void Assembler::LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
2329 NEONLoadStoreMultiStructOp op) {
2330 LoadStoreStructVerify(vt, addr, op);
2331 DCHECK(vt.IsVector() || vt.Is1D());
2332 Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2333 }
2334
LoadStoreStructSingleAllLanes(const VRegister & vt,const MemOperand & addr,NEONLoadStoreSingleStructOp op)2335 void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
2336 const MemOperand& addr,
2337 NEONLoadStoreSingleStructOp op) {
2338 LoadStoreStructVerify(vt, addr, op);
2339 Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2340 }
2341
ld1(const VRegister & vt,const MemOperand & src)2342 void Assembler::ld1(const VRegister& vt, const MemOperand& src) {
2343 LoadStoreStruct(vt, src, NEON_LD1_1v);
2344 }
2345
ld1(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2346 void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2347 const MemOperand& src) {
2348 USE(vt2);
2349 DCHECK(AreSameFormat(vt, vt2));
2350 DCHECK(AreConsecutive(vt, vt2));
2351 LoadStoreStruct(vt, src, NEON_LD1_2v);
2352 }
2353
ld1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2354 void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2355 const VRegister& vt3, const MemOperand& src) {
2356 USE(vt2);
2357 USE(vt3);
2358 DCHECK(AreSameFormat(vt, vt2, vt3));
2359 DCHECK(AreConsecutive(vt, vt2, vt3));
2360 LoadStoreStruct(vt, src, NEON_LD1_3v);
2361 }
2362
ld1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2363 void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2364 const VRegister& vt3, const VRegister& vt4,
2365 const MemOperand& src) {
2366 USE(vt2);
2367 USE(vt3);
2368 USE(vt4);
2369 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2370 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2371 LoadStoreStruct(vt, src, NEON_LD1_4v);
2372 }
2373
ld2(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2374 void Assembler::ld2(const VRegister& vt, const VRegister& vt2,
2375 const MemOperand& src) {
2376 USE(vt2);
2377 DCHECK(AreSameFormat(vt, vt2));
2378 DCHECK(AreConsecutive(vt, vt2));
2379 LoadStoreStruct(vt, src, NEON_LD2);
2380 }
2381
ld2(const VRegister & vt,const VRegister & vt2,int lane,const MemOperand & src)2382 void Assembler::ld2(const VRegister& vt, const VRegister& vt2, int lane,
2383 const MemOperand& src) {
2384 USE(vt2);
2385 DCHECK(AreSameFormat(vt, vt2));
2386 DCHECK(AreConsecutive(vt, vt2));
2387 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
2388 }
2389
ld2r(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2390 void Assembler::ld2r(const VRegister& vt, const VRegister& vt2,
2391 const MemOperand& src) {
2392 USE(vt2);
2393 DCHECK(AreSameFormat(vt, vt2));
2394 DCHECK(AreConsecutive(vt, vt2));
2395 LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
2396 }
2397
ld3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2398 void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
2399 const VRegister& vt3, const MemOperand& src) {
2400 USE(vt2);
2401 USE(vt3);
2402 DCHECK(AreSameFormat(vt, vt2, vt3));
2403 DCHECK(AreConsecutive(vt, vt2, vt3));
2404 LoadStoreStruct(vt, src, NEON_LD3);
2405 }
2406
ld3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,int lane,const MemOperand & src)2407 void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
2408 const VRegister& vt3, int lane, const MemOperand& src) {
2409 USE(vt2);
2410 USE(vt3);
2411 DCHECK(AreSameFormat(vt, vt2, vt3));
2412 DCHECK(AreConsecutive(vt, vt2, vt3));
2413 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
2414 }
2415
ld3r(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2416 void Assembler::ld3r(const VRegister& vt, const VRegister& vt2,
2417 const VRegister& vt3, const MemOperand& src) {
2418 USE(vt2);
2419 USE(vt3);
2420 DCHECK(AreSameFormat(vt, vt2, vt3));
2421 DCHECK(AreConsecutive(vt, vt2, vt3));
2422 LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
2423 }
2424
ld4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2425 void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
2426 const VRegister& vt3, const VRegister& vt4,
2427 const MemOperand& src) {
2428 USE(vt2);
2429 USE(vt3);
2430 USE(vt4);
2431 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2432 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2433 LoadStoreStruct(vt, src, NEON_LD4);
2434 }
2435
ld4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,int lane,const MemOperand & src)2436 void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
2437 const VRegister& vt3, const VRegister& vt4, int lane,
2438 const MemOperand& src) {
2439 USE(vt2);
2440 USE(vt3);
2441 USE(vt4);
2442 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2443 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2444 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
2445 }
2446
ld4r(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2447 void Assembler::ld4r(const VRegister& vt, const VRegister& vt2,
2448 const VRegister& vt3, const VRegister& vt4,
2449 const MemOperand& src) {
2450 USE(vt2);
2451 USE(vt3);
2452 USE(vt4);
2453 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2454 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2455 LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
2456 }
2457
st1(const VRegister & vt,const MemOperand & src)2458 void Assembler::st1(const VRegister& vt, const MemOperand& src) {
2459 LoadStoreStruct(vt, src, NEON_ST1_1v);
2460 }
2461
st1(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2462 void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2463 const MemOperand& src) {
2464 USE(vt2);
2465 DCHECK(AreSameFormat(vt, vt2));
2466 DCHECK(AreConsecutive(vt, vt2));
2467 LoadStoreStruct(vt, src, NEON_ST1_2v);
2468 }
2469
st1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2470 void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2471 const VRegister& vt3, const MemOperand& src) {
2472 USE(vt2);
2473 USE(vt3);
2474 DCHECK(AreSameFormat(vt, vt2, vt3));
2475 DCHECK(AreConsecutive(vt, vt2, vt3));
2476 LoadStoreStruct(vt, src, NEON_ST1_3v);
2477 }
2478
st1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2479 void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2480 const VRegister& vt3, const VRegister& vt4,
2481 const MemOperand& src) {
2482 USE(vt2);
2483 USE(vt3);
2484 USE(vt4);
2485 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2486 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2487 LoadStoreStruct(vt, src, NEON_ST1_4v);
2488 }
2489
st2(const VRegister & vt,const VRegister & vt2,const MemOperand & dst)2490 void Assembler::st2(const VRegister& vt, const VRegister& vt2,
2491 const MemOperand& dst) {
2492 USE(vt2);
2493 DCHECK(AreSameFormat(vt, vt2));
2494 DCHECK(AreConsecutive(vt, vt2));
2495 LoadStoreStruct(vt, dst, NEON_ST2);
2496 }
2497
st2(const VRegister & vt,const VRegister & vt2,int lane,const MemOperand & dst)2498 void Assembler::st2(const VRegister& vt, const VRegister& vt2, int lane,
2499 const MemOperand& dst) {
2500 USE(vt2);
2501 DCHECK(AreSameFormat(vt, vt2));
2502 DCHECK(AreConsecutive(vt, vt2));
2503 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
2504 }
2505
st3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & dst)2506 void Assembler::st3(const VRegister& vt, const VRegister& vt2,
2507 const VRegister& vt3, const MemOperand& dst) {
2508 USE(vt2);
2509 USE(vt3);
2510 DCHECK(AreSameFormat(vt, vt2, vt3));
2511 DCHECK(AreConsecutive(vt, vt2, vt3));
2512 LoadStoreStruct(vt, dst, NEON_ST3);
2513 }
2514
st3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,int lane,const MemOperand & dst)2515 void Assembler::st3(const VRegister& vt, const VRegister& vt2,
2516 const VRegister& vt3, int lane, const MemOperand& dst) {
2517 USE(vt2);
2518 USE(vt3);
2519 DCHECK(AreSameFormat(vt, vt2, vt3));
2520 DCHECK(AreConsecutive(vt, vt2, vt3));
2521 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
2522 }
2523
st4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & dst)2524 void Assembler::st4(const VRegister& vt, const VRegister& vt2,
2525 const VRegister& vt3, const VRegister& vt4,
2526 const MemOperand& dst) {
2527 USE(vt2);
2528 USE(vt3);
2529 USE(vt4);
2530 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2531 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2532 LoadStoreStruct(vt, dst, NEON_ST4);
2533 }
2534
st4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,int lane,const MemOperand & dst)2535 void Assembler::st4(const VRegister& vt, const VRegister& vt2,
2536 const VRegister& vt3, const VRegister& vt4, int lane,
2537 const MemOperand& dst) {
2538 USE(vt2);
2539 USE(vt3);
2540 USE(vt4);
2541 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2542 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2543 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
2544 }
2545
LoadStoreStructSingle(const VRegister & vt,uint32_t lane,const MemOperand & addr,NEONLoadStoreSingleStructOp op)2546 void Assembler::LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
2547 const MemOperand& addr,
2548 NEONLoadStoreSingleStructOp op) {
2549 LoadStoreStructVerify(vt, addr, op);
2550
2551 // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
2552 // number of lanes, and T is b, h, s or d.
2553 unsigned lane_size = vt.LaneSizeInBytes();
2554 DCHECK_LT(lane, kQRegSize / lane_size);
2555
2556 // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
2557 // S and size fields.
2558 lane *= lane_size;
2559
2560 // Encodings for S[0]/D[0] and S[2]/D[1] are distinguished using the least-
2561 // significant bit of the size field, so we increment lane here to account for
2562 // that.
2563 if (lane_size == 8) lane++;
2564
2565 Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
2566 Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
2567 Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;
2568
2569 Instr instr = op;
2570 switch (lane_size) {
2571 case 1:
2572 instr |= NEONLoadStoreSingle_b;
2573 break;
2574 case 2:
2575 instr |= NEONLoadStoreSingle_h;
2576 break;
2577 case 4:
2578 instr |= NEONLoadStoreSingle_s;
2579 break;
2580 default:
2581 DCHECK_EQ(lane_size, 8U);
2582 instr |= NEONLoadStoreSingle_d;
2583 }
2584
2585 Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
2586 }
2587
ld1(const VRegister & vt,int lane,const MemOperand & src)2588 void Assembler::ld1(const VRegister& vt, int lane, const MemOperand& src) {
2589 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
2590 }
2591
ld1r(const VRegister & vt,const MemOperand & src)2592 void Assembler::ld1r(const VRegister& vt, const MemOperand& src) {
2593 LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
2594 }
2595
st1(const VRegister & vt,int lane,const MemOperand & dst)2596 void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) {
2597 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
2598 }
2599
dmb(BarrierDomain domain,BarrierType type)2600 void Assembler::dmb(BarrierDomain domain, BarrierType type) {
2601 Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
2602 }
2603
dsb(BarrierDomain domain,BarrierType type)2604 void Assembler::dsb(BarrierDomain domain, BarrierType type) {
2605 Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
2606 }
2607
isb()2608 void Assembler::isb() {
2609 Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
2610 }
2611
csdb()2612 void Assembler::csdb() { hint(CSDB); }
2613
fmov(const VRegister & vd,double imm)2614 void Assembler::fmov(const VRegister& vd, double imm) {
2615 if (vd.IsScalar()) {
2616 DCHECK(vd.Is1D());
2617 Emit(FMOV_d_imm | Rd(vd) | ImmFP(imm));
2618 } else {
2619 DCHECK(vd.Is2D());
2620 Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit;
2621 Emit(NEON_Q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2622 }
2623 }
2624
fmov(const VRegister & vd,float imm)2625 void Assembler::fmov(const VRegister& vd, float imm) {
2626 if (vd.IsScalar()) {
2627 DCHECK(vd.Is1S());
2628 Emit(FMOV_s_imm | Rd(vd) | ImmFP(imm));
2629 } else {
2630 DCHECK(vd.Is2S() | vd.Is4S());
2631 Instr op = NEONModifiedImmediate_MOVI;
2632 Instr q = vd.Is4S() ? NEON_Q : 0;
2633 Emit(q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2634 }
2635 }
2636
fmov(const Register & rd,const VRegister & fn)2637 void Assembler::fmov(const Register& rd, const VRegister& fn) {
2638 DCHECK_EQ(rd.SizeInBits(), fn.SizeInBits());
2639 FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
2640 Emit(op | Rd(rd) | Rn(fn));
2641 }
2642
fmov(const VRegister & vd,const Register & rn)2643 void Assembler::fmov(const VRegister& vd, const Register& rn) {
2644 DCHECK_EQ(vd.SizeInBits(), rn.SizeInBits());
2645 FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
2646 Emit(op | Rd(vd) | Rn(rn));
2647 }
2648
fmov(const VRegister & vd,const VRegister & vn)2649 void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
2650 DCHECK_EQ(vd.SizeInBits(), vn.SizeInBits());
2651 Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
2652 }
2653
fmov(const VRegister & vd,int index,const Register & rn)2654 void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
2655 DCHECK((index == 1) && vd.Is1D() && rn.IsX());
2656 USE(index);
2657 Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
2658 }
2659
fmov(const Register & rd,const VRegister & vn,int index)2660 void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
2661 DCHECK((index == 1) && vn.Is1D() && rd.IsX());
2662 USE(index);
2663 Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
2664 }
2665
fmadd(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2666 void Assembler::fmadd(const VRegister& fd, const VRegister& fn,
2667 const VRegister& fm, const VRegister& fa) {
2668 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d);
2669 }
2670
fmsub(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2671 void Assembler::fmsub(const VRegister& fd, const VRegister& fn,
2672 const VRegister& fm, const VRegister& fa) {
2673 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMSUB_s : FMSUB_d);
2674 }
2675
fnmadd(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2676 void Assembler::fnmadd(const VRegister& fd, const VRegister& fn,
2677 const VRegister& fm, const VRegister& fa) {
2678 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d);
2679 }
2680
fnmsub(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2681 void Assembler::fnmsub(const VRegister& fd, const VRegister& fn,
2682 const VRegister& fm, const VRegister& fa) {
2683 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d);
2684 }
2685
fnmul(const VRegister & vd,const VRegister & vn,const VRegister & vm)2686 void Assembler::fnmul(const VRegister& vd, const VRegister& vn,
2687 const VRegister& vm) {
2688 DCHECK(AreSameSizeAndType(vd, vn, vm));
2689 Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
2690 Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
2691 }
2692
fcmp(const VRegister & fn,const VRegister & fm)2693 void Assembler::fcmp(const VRegister& fn, const VRegister& fm) {
2694 DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
2695 Emit(FPType(fn) | FCMP | Rm(fm) | Rn(fn));
2696 }
2697
fcmp(const VRegister & fn,double value)2698 void Assembler::fcmp(const VRegister& fn, double value) {
2699 USE(value);
2700 // Although the fcmp instruction can strictly only take an immediate value of
2701 // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
2702 // affect the result of the comparison.
2703 DCHECK_EQ(value, 0.0);
2704 Emit(FPType(fn) | FCMP_zero | Rn(fn));
2705 }
2706
fccmp(const VRegister & fn,const VRegister & fm,StatusFlags nzcv,Condition cond)2707 void Assembler::fccmp(const VRegister& fn, const VRegister& fm,
2708 StatusFlags nzcv, Condition cond) {
2709 DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
2710 Emit(FPType(fn) | FCCMP | Rm(fm) | Cond(cond) | Rn(fn) | Nzcv(nzcv));
2711 }
2712
fcsel(const VRegister & fd,const VRegister & fn,const VRegister & fm,Condition cond)2713 void Assembler::fcsel(const VRegister& fd, const VRegister& fn,
2714 const VRegister& fm, Condition cond) {
2715 DCHECK_EQ(fd.SizeInBits(), fn.SizeInBits());
2716 DCHECK_EQ(fd.SizeInBits(), fm.SizeInBits());
2717 Emit(FPType(fd) | FCSEL | Rm(fm) | Cond(cond) | Rn(fn) | Rd(fd));
2718 }
2719
NEONFPConvertToInt(const Register & rd,const VRegister & vn,Instr op)2720 void Assembler::NEONFPConvertToInt(const Register& rd, const VRegister& vn,
2721 Instr op) {
2722 Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
2723 }
2724
NEONFPConvertToInt(const VRegister & vd,const VRegister & vn,Instr op)2725 void Assembler::NEONFPConvertToInt(const VRegister& vd, const VRegister& vn,
2726 Instr op) {
2727 if (vn.IsScalar()) {
2728 DCHECK((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
2729 op |= NEON_Q | NEONScalar;
2730 }
2731 Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
2732 }
2733
fcvt(const VRegister & vd,const VRegister & vn)2734 void Assembler::fcvt(const VRegister& vd, const VRegister& vn) {
2735 FPDataProcessing1SourceOp op;
2736 if (vd.Is1D()) {
2737 DCHECK(vn.Is1S() || vn.Is1H());
2738 op = vn.Is1S() ? FCVT_ds : FCVT_dh;
2739 } else if (vd.Is1S()) {
2740 DCHECK(vn.Is1D() || vn.Is1H());
2741 op = vn.Is1D() ? FCVT_sd : FCVT_sh;
2742 } else {
2743 DCHECK(vd.Is1H());
2744 DCHECK(vn.Is1D() || vn.Is1S());
2745 op = vn.Is1D() ? FCVT_hd : FCVT_hs;
2746 }
2747 FPDataProcessing1Source(vd, vn, op);
2748 }
2749
fcvtl(const VRegister & vd,const VRegister & vn)2750 void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) {
2751 DCHECK((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S()));
2752 Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
2753 Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
2754 }
2755
fcvtl2(const VRegister & vd,const VRegister & vn)2756 void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) {
2757 DCHECK((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S()));
2758 Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
2759 Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
2760 }
2761
fcvtn(const VRegister & vd,const VRegister & vn)2762 void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) {
2763 DCHECK((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S()));
2764 Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
2765 Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
2766 }
2767
fcvtn2(const VRegister & vd,const VRegister & vn)2768 void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) {
2769 DCHECK((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S()));
2770 Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
2771 Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
2772 }
2773
fcvtxn(const VRegister & vd,const VRegister & vn)2774 void Assembler::fcvtxn(const VRegister& vd, const VRegister& vn) {
2775 Instr format = 1 << NEONSize_offset;
2776 if (vd.IsScalar()) {
2777 DCHECK(vd.Is1S() && vn.Is1D());
2778 Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
2779 } else {
2780 DCHECK(vd.Is2S() && vn.Is2D());
2781 Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
2782 }
2783 }
2784
fcvtxn2(const VRegister & vd,const VRegister & vn)2785 void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) {
2786 DCHECK(vd.Is4S() && vn.Is2D());
2787 Instr format = 1 << NEONSize_offset;
2788 Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
2789 }
2790
fjcvtzs(const Register & rd,const VRegister & vn)2791 void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
2792 DCHECK(rd.IsW() && vn.Is1D());
2793 Emit(FJCVTZS | Rn(vn) | Rd(rd));
2794 }
2795
2796 #define NEON_FP2REGMISC_FCVT_LIST(V) \
2797 V(fcvtnu, NEON_FCVTNU, FCVTNU) \
2798 V(fcvtns, NEON_FCVTNS, FCVTNS) \
2799 V(fcvtpu, NEON_FCVTPU, FCVTPU) \
2800 V(fcvtps, NEON_FCVTPS, FCVTPS) \
2801 V(fcvtmu, NEON_FCVTMU, FCVTMU) \
2802 V(fcvtms, NEON_FCVTMS, FCVTMS) \
2803 V(fcvtau, NEON_FCVTAU, FCVTAU) \
2804 V(fcvtas, NEON_FCVTAS, FCVTAS)
2805
2806 #define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
2807 void Assembler::FN(const Register& rd, const VRegister& vn) { \
2808 NEONFPConvertToInt(rd, vn, SCA_OP); \
2809 } \
2810 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
2811 NEONFPConvertToInt(vd, vn, VEC_OP); \
2812 }
NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)2813 NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
2814 #undef DEFINE_ASM_FUNCS
2815
2816 void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) {
2817 DCHECK_GE(fbits, 0);
2818 if (fbits == 0) {
2819 NEONFP2RegMisc(vd, vn, NEON_SCVTF);
2820 } else {
2821 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2822 NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
2823 }
2824 }
2825
ucvtf(const VRegister & vd,const VRegister & vn,int fbits)2826 void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) {
2827 DCHECK_GE(fbits, 0);
2828 if (fbits == 0) {
2829 NEONFP2RegMisc(vd, vn, NEON_UCVTF);
2830 } else {
2831 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2832 NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
2833 }
2834 }
2835
scvtf(const VRegister & vd,const Register & rn,int fbits)2836 void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) {
2837 DCHECK_GE(fbits, 0);
2838 if (fbits == 0) {
2839 Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
2840 } else {
2841 Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
2842 Rd(vd));
2843 }
2844 }
2845
ucvtf(const VRegister & fd,const Register & rn,int fbits)2846 void Assembler::ucvtf(const VRegister& fd, const Register& rn, int fbits) {
2847 DCHECK_GE(fbits, 0);
2848 if (fbits == 0) {
2849 Emit(SF(rn) | FPType(fd) | UCVTF | Rn(rn) | Rd(fd));
2850 } else {
2851 Emit(SF(rn) | FPType(fd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
2852 Rd(fd));
2853 }
2854 }
2855
NEON3Same(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3SameOp vop)2856 void Assembler::NEON3Same(const VRegister& vd, const VRegister& vn,
2857 const VRegister& vm, NEON3SameOp vop) {
2858 DCHECK(AreSameFormat(vd, vn, vm));
2859 DCHECK(vd.IsVector() || !vd.IsQ());
2860
2861 Instr format, op = vop;
2862 if (vd.IsScalar()) {
2863 op |= NEON_Q | NEONScalar;
2864 format = SFormat(vd);
2865 } else {
2866 format = VFormat(vd);
2867 }
2868
2869 Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
2870 }
2871
NEONFP3Same(const VRegister & vd,const VRegister & vn,const VRegister & vm,Instr op)2872 void Assembler::NEONFP3Same(const VRegister& vd, const VRegister& vn,
2873 const VRegister& vm, Instr op) {
2874 DCHECK(AreSameFormat(vd, vn, vm));
2875 Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
2876 }
2877
2878 #define NEON_FP2REGMISC_LIST(V) \
2879 V(fabs, NEON_FABS, FABS) \
2880 V(fneg, NEON_FNEG, FNEG) \
2881 V(fsqrt, NEON_FSQRT, FSQRT) \
2882 V(frintn, NEON_FRINTN, FRINTN) \
2883 V(frinta, NEON_FRINTA, FRINTA) \
2884 V(frintp, NEON_FRINTP, FRINTP) \
2885 V(frintm, NEON_FRINTM, FRINTM) \
2886 V(frintx, NEON_FRINTX, FRINTX) \
2887 V(frintz, NEON_FRINTZ, FRINTZ) \
2888 V(frinti, NEON_FRINTI, FRINTI) \
2889 V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
2890 V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar)
2891
2892 #define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
2893 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
2894 Instr op; \
2895 if (vd.IsScalar()) { \
2896 DCHECK(vd.Is1S() || vd.Is1D()); \
2897 op = SCA_OP; \
2898 } else { \
2899 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
2900 op = VEC_OP; \
2901 } \
2902 NEONFP2RegMisc(vd, vn, op); \
2903 }
NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)2904 NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
2905 #undef DEFINE_ASM_FUNC
2906
2907 void Assembler::shll(const VRegister& vd, const VRegister& vn, int shift) {
2908 DCHECK((vd.Is8H() && vn.Is8B() && shift == 8) ||
2909 (vd.Is4S() && vn.Is4H() && shift == 16) ||
2910 (vd.Is2D() && vn.Is2S() && shift == 32));
2911 USE(shift);
2912 Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
2913 }
2914
shll2(const VRegister & vd,const VRegister & vn,int shift)2915 void Assembler::shll2(const VRegister& vd, const VRegister& vn, int shift) {
2916 USE(shift);
2917 DCHECK((vd.Is8H() && vn.Is16B() && shift == 8) ||
2918 (vd.Is4S() && vn.Is8H() && shift == 16) ||
2919 (vd.Is2D() && vn.Is4S() && shift == 32));
2920 Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
2921 }
2922
NEONFP2RegMisc(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp vop,double value)2923 void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
2924 NEON2RegMiscOp vop, double value) {
2925 DCHECK(AreSameFormat(vd, vn));
2926 DCHECK_EQ(value, 0.0);
2927 USE(value);
2928
2929 Instr op = vop;
2930 if (vd.IsScalar()) {
2931 DCHECK(vd.Is1S() || vd.Is1D());
2932 op |= NEON_Q | NEONScalar;
2933 } else {
2934 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());
2935 }
2936
2937 Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
2938 }
2939
fcmeq(const VRegister & vd,const VRegister & vn,double value)2940 void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) {
2941 NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
2942 }
2943
fcmge(const VRegister & vd,const VRegister & vn,double value)2944 void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) {
2945 NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
2946 }
2947
fcmgt(const VRegister & vd,const VRegister & vn,double value)2948 void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) {
2949 NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
2950 }
2951
fcmle(const VRegister & vd,const VRegister & vn,double value)2952 void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) {
2953 NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
2954 }
2955
fcmlt(const VRegister & vd,const VRegister & vn,double value)2956 void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) {
2957 NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
2958 }
2959
frecpx(const VRegister & vd,const VRegister & vn)2960 void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
2961 DCHECK(vd.IsScalar());
2962 DCHECK(AreSameFormat(vd, vn));
2963 DCHECK(vd.Is1S() || vd.Is1D());
2964 Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
2965 }
2966
fcvtzs(const Register & rd,const VRegister & vn,int fbits)2967 void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
2968 DCHECK(vn.Is1S() || vn.Is1D());
2969 DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
2970 if (fbits == 0) {
2971 Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
2972 } else {
2973 Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
2974 Rd(rd));
2975 }
2976 }
2977
fcvtzs(const VRegister & vd,const VRegister & vn,int fbits)2978 void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) {
2979 DCHECK_GE(fbits, 0);
2980 if (fbits == 0) {
2981 NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
2982 } else {
2983 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2984 NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
2985 }
2986 }
2987
fcvtzu(const Register & rd,const VRegister & vn,int fbits)2988 void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) {
2989 DCHECK(vn.Is1S() || vn.Is1D());
2990 DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
2991 if (fbits == 0) {
2992 Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
2993 } else {
2994 Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
2995 Rd(rd));
2996 }
2997 }
2998
fcvtzu(const VRegister & vd,const VRegister & vn,int fbits)2999 void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) {
3000 DCHECK_GE(fbits, 0);
3001 if (fbits == 0) {
3002 NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
3003 } else {
3004 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
3005 NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
3006 }
3007 }
3008
NEONFP2RegMisc(const VRegister & vd,const VRegister & vn,Instr op)3009 void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
3010 Instr op) {
3011 DCHECK(AreSameFormat(vd, vn));
3012 Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
3013 }
3014
NEON2RegMisc(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp vop,int value)3015 void Assembler::NEON2RegMisc(const VRegister& vd, const VRegister& vn,
3016 NEON2RegMiscOp vop, int value) {
3017 DCHECK(AreSameFormat(vd, vn));
3018 DCHECK_EQ(value, 0);
3019 USE(value);
3020
3021 Instr format, op = vop;
3022 if (vd.IsScalar()) {
3023 op |= NEON_Q | NEONScalar;
3024 format = SFormat(vd);
3025 } else {
3026 format = VFormat(vd);
3027 }
3028
3029 Emit(format | op | Rn(vn) | Rd(vd));
3030 }
3031
cmeq(const VRegister & vd,const VRegister & vn,int value)3032 void Assembler::cmeq(const VRegister& vd, const VRegister& vn, int value) {
3033 DCHECK(vd.IsVector() || vd.Is1D());
3034 NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
3035 }
3036
cmge(const VRegister & vd,const VRegister & vn,int value)3037 void Assembler::cmge(const VRegister& vd, const VRegister& vn, int value) {
3038 DCHECK(vd.IsVector() || vd.Is1D());
3039 NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
3040 }
3041
cmgt(const VRegister & vd,const VRegister & vn,int value)3042 void Assembler::cmgt(const VRegister& vd, const VRegister& vn, int value) {
3043 DCHECK(vd.IsVector() || vd.Is1D());
3044 NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
3045 }
3046
cmle(const VRegister & vd,const VRegister & vn,int value)3047 void Assembler::cmle(const VRegister& vd, const VRegister& vn, int value) {
3048 DCHECK(vd.IsVector() || vd.Is1D());
3049 NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
3050 }
3051
cmlt(const VRegister & vd,const VRegister & vn,int value)3052 void Assembler::cmlt(const VRegister& vd, const VRegister& vn, int value) {
3053 DCHECK(vd.IsVector() || vd.Is1D());
3054 NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
3055 }
3056
3057 #define NEON_3SAME_LIST(V) \
3058 V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \
3059 V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \
3060 V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \
3061 V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \
3062 V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \
3063 V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \
3064 V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \
3065 V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \
3066 V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \
3067 V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \
3068 V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \
3069 V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \
3070 V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \
3071 V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
3072 V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
3073 V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3074 V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3075 V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3076 V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
3077 V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
3078 V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
3079 V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
3080 V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
3081 V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
3082 V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
3083 V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
3084 V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
3085 V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
3086 V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
3087 V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \
3088 V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \
3089 V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \
3090 V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \
3091 V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \
3092 V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \
3093 V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \
3094 V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \
3095 V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \
3096 V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \
3097 V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \
3098 V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \
3099 V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \
3100 V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \
3101 V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \
3102 V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \
3103 V(uqadd, NEON_UQADD, true) \
3104 V(sqadd, NEON_SQADD, true) \
3105 V(uqsub, NEON_UQSUB, true) \
3106 V(sqsub, NEON_SQSUB, true) \
3107 V(sqshl, NEON_SQSHL, true) \
3108 V(uqshl, NEON_UQSHL, true) \
3109 V(sqrshl, NEON_SQRSHL, true) \
3110 V(uqrshl, NEON_UQRSHL, true)
3111
3112 #define DEFINE_ASM_FUNC(FN, OP, AS) \
3113 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3114 const VRegister& vm) { \
3115 DCHECK(AS); \
3116 NEON3Same(vd, vn, vm, OP); \
3117 }
3118 NEON_3SAME_LIST(DEFINE_ASM_FUNC)
3119 #undef DEFINE_ASM_FUNC
3120
3121 #define NEON_FP3SAME_LIST_V2(V) \
3122 V(fadd, NEON_FADD, FADD) \
3123 V(fsub, NEON_FSUB, FSUB) \
3124 V(fmul, NEON_FMUL, FMUL) \
3125 V(fdiv, NEON_FDIV, FDIV) \
3126 V(fmax, NEON_FMAX, FMAX) \
3127 V(fmaxnm, NEON_FMAXNM, FMAXNM) \
3128 V(fmin, NEON_FMIN, FMIN) \
3129 V(fminnm, NEON_FMINNM, FMINNM) \
3130 V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \
3131 V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \
3132 V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
3133 V(fabd, NEON_FABD, NEON_FABD_scalar) \
3134 V(fmla, NEON_FMLA, 0) \
3135 V(fmls, NEON_FMLS, 0) \
3136 V(facge, NEON_FACGE, NEON_FACGE_scalar) \
3137 V(facgt, NEON_FACGT, NEON_FACGT_scalar) \
3138 V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \
3139 V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \
3140 V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \
3141 V(faddp, NEON_FADDP, 0) \
3142 V(fmaxp, NEON_FMAXP, 0) \
3143 V(fminp, NEON_FMINP, 0) \
3144 V(fmaxnmp, NEON_FMAXNMP, 0) \
3145 V(fminnmp, NEON_FMINNMP, 0)
3146
3147 #define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
3148 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3149 const VRegister& vm) { \
3150 Instr op; \
3151 if ((SCA_OP != 0) && vd.IsScalar()) { \
3152 DCHECK(vd.Is1S() || vd.Is1D()); \
3153 op = SCA_OP; \
3154 } else { \
3155 DCHECK(vd.IsVector()); \
3156 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
3157 op = VEC_OP; \
3158 } \
3159 NEONFP3Same(vd, vn, vm, op); \
3160 }
NEON_FP3SAME_LIST_V2(DEFINE_ASM_FUNC)3161 NEON_FP3SAME_LIST_V2(DEFINE_ASM_FUNC)
3162 #undef DEFINE_ASM_FUNC
3163
3164 void Assembler::addp(const VRegister& vd, const VRegister& vn) {
3165 DCHECK((vd.Is1D() && vn.Is2D()));
3166 Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
3167 }
3168
faddp(const VRegister & vd,const VRegister & vn)3169 void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
3170 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3171 Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
3172 }
3173
fmaxp(const VRegister & vd,const VRegister & vn)3174 void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) {
3175 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3176 Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
3177 }
3178
fminp(const VRegister & vd,const VRegister & vn)3179 void Assembler::fminp(const VRegister& vd, const VRegister& vn) {
3180 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3181 Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
3182 }
3183
fmaxnmp(const VRegister & vd,const VRegister & vn)3184 void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) {
3185 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3186 Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
3187 }
3188
fminnmp(const VRegister & vd,const VRegister & vn)3189 void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) {
3190 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3191 Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
3192 }
3193
orr(const VRegister & vd,const int imm8,const int left_shift)3194 void Assembler::orr(const VRegister& vd, const int imm8, const int left_shift) {
3195 NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_ORR);
3196 }
3197
mov(const VRegister & vd,const VRegister & vn)3198 void Assembler::mov(const VRegister& vd, const VRegister& vn) {
3199 DCHECK(AreSameFormat(vd, vn));
3200 if (vd.IsD()) {
3201 orr(vd.V8B(), vn.V8B(), vn.V8B());
3202 } else {
3203 DCHECK(vd.IsQ());
3204 orr(vd.V16B(), vn.V16B(), vn.V16B());
3205 }
3206 }
3207
bic(const VRegister & vd,const int imm8,const int left_shift)3208 void Assembler::bic(const VRegister& vd, const int imm8, const int left_shift) {
3209 NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_BIC);
3210 }
3211
movi(const VRegister & vd,const uint64_t imm,Shift shift,const int shift_amount)3212 void Assembler::movi(const VRegister& vd, const uint64_t imm, Shift shift,
3213 const int shift_amount) {
3214 DCHECK((shift == LSL) || (shift == MSL));
3215 if (vd.Is2D() || vd.Is1D()) {
3216 DCHECK_EQ(shift_amount, 0);
3217 int imm8 = 0;
3218 for (int i = 0; i < 8; ++i) {
3219 int byte = (imm >> (i * 8)) & 0xFF;
3220 DCHECK((byte == 0) || (byte == 0xFF));
3221 if (byte == 0xFF) {
3222 imm8 |= (1 << i);
3223 }
3224 }
3225 Instr q = vd.Is2D() ? NEON_Q : 0;
3226 Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
3227 ImmNEONabcdefgh(imm8) | NEONCmode(0xE) | Rd(vd));
3228 } else if (shift == LSL) {
3229 DCHECK(is_uint8(imm));
3230 NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
3231 NEONModifiedImmediate_MOVI);
3232 } else {
3233 DCHECK(is_uint8(imm));
3234 NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
3235 NEONModifiedImmediate_MOVI);
3236 }
3237 }
3238
mvn(const VRegister & vd,const VRegister & vn)3239 void Assembler::mvn(const VRegister& vd, const VRegister& vn) {
3240 DCHECK(AreSameFormat(vd, vn));
3241 if (vd.IsD()) {
3242 not_(vd.V8B(), vn.V8B());
3243 } else {
3244 DCHECK(vd.IsQ());
3245 not_(vd.V16B(), vn.V16B());
3246 }
3247 }
3248
mvni(const VRegister & vd,const int imm8,Shift shift,const int shift_amount)3249 void Assembler::mvni(const VRegister& vd, const int imm8, Shift shift,
3250 const int shift_amount) {
3251 DCHECK((shift == LSL) || (shift == MSL));
3252 if (shift == LSL) {
3253 NEONModifiedImmShiftLsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
3254 } else {
3255 NEONModifiedImmShiftMsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
3256 }
3257 }
3258
NEONFPByElement(const VRegister & vd,const VRegister & vn,const VRegister & vm,int vm_index,NEONByIndexedElementOp vop)3259 void Assembler::NEONFPByElement(const VRegister& vd, const VRegister& vn,
3260 const VRegister& vm, int vm_index,
3261 NEONByIndexedElementOp vop) {
3262 DCHECK(AreSameFormat(vd, vn));
3263 DCHECK((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) ||
3264 (vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) ||
3265 (vd.Is1D() && vm.Is1D()));
3266 DCHECK((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)));
3267
3268 Instr op = vop;
3269 int index_num_bits = vm.Is1S() ? 2 : 1;
3270 if (vd.IsScalar()) {
3271 op |= NEON_Q | NEONScalar;
3272 }
3273
3274 Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) |
3275 Rn(vn) | Rd(vd));
3276 }
3277
NEONByElement(const VRegister & vd,const VRegister & vn,const VRegister & vm,int vm_index,NEONByIndexedElementOp vop)3278 void Assembler::NEONByElement(const VRegister& vd, const VRegister& vn,
3279 const VRegister& vm, int vm_index,
3280 NEONByIndexedElementOp vop) {
3281 DCHECK(AreSameFormat(vd, vn));
3282 DCHECK((vd.Is4H() && vm.Is1H()) || (vd.Is8H() && vm.Is1H()) ||
3283 (vd.Is1H() && vm.Is1H()) || (vd.Is2S() && vm.Is1S()) ||
3284 (vd.Is4S() && vm.Is1S()) || (vd.Is1S() && vm.Is1S()));
3285 DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
3286 (vm.Is1S() && (vm_index < 4)));
3287
3288 Instr format, op = vop;
3289 int index_num_bits = vm.Is1H() ? 3 : 2;
3290 if (vd.IsScalar()) {
3291 op |= NEONScalar | NEON_Q;
3292 format = SFormat(vn);
3293 } else {
3294 format = VFormat(vn);
3295 }
3296 Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
3297 Rd(vd));
3298 }
3299
NEONByElementL(const VRegister & vd,const VRegister & vn,const VRegister & vm,int vm_index,NEONByIndexedElementOp vop)3300 void Assembler::NEONByElementL(const VRegister& vd, const VRegister& vn,
3301 const VRegister& vm, int vm_index,
3302 NEONByIndexedElementOp vop) {
3303 DCHECK((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
3304 (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
3305 (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
3306 (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
3307 (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
3308 (vd.Is1D() && vn.Is1S() && vm.Is1S()));
3309
3310 DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
3311 (vm.Is1S() && (vm_index < 4)));
3312
3313 Instr format, op = vop;
3314 int index_num_bits = vm.Is1H() ? 3 : 2;
3315 if (vd.IsScalar()) {
3316 op |= NEONScalar | NEON_Q;
3317 format = SFormat(vn);
3318 } else {
3319 format = VFormat(vn);
3320 }
3321 Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
3322 Rd(vd));
3323 }
3324
3325 #define NEON_BYELEMENT_LIST(V) \
3326 V(mul, NEON_MUL_byelement, vn.IsVector()) \
3327 V(mla, NEON_MLA_byelement, vn.IsVector()) \
3328 V(mls, NEON_MLS_byelement, vn.IsVector()) \
3329 V(sqdmulh, NEON_SQDMULH_byelement, true) \
3330 V(sqrdmulh, NEON_SQRDMULH_byelement, true)
3331
3332 #define DEFINE_ASM_FUNC(FN, OP, AS) \
3333 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3334 const VRegister& vm, int vm_index) { \
3335 DCHECK(AS); \
3336 NEONByElement(vd, vn, vm, vm_index, OP); \
3337 }
3338 NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
3339 #undef DEFINE_ASM_FUNC
3340
3341 #define NEON_FPBYELEMENT_LIST(V) \
3342 V(fmul, NEON_FMUL_byelement) \
3343 V(fmla, NEON_FMLA_byelement) \
3344 V(fmls, NEON_FMLS_byelement) \
3345 V(fmulx, NEON_FMULX_byelement)
3346
3347 #define DEFINE_ASM_FUNC(FN, OP) \
3348 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3349 const VRegister& vm, int vm_index) { \
3350 NEONFPByElement(vd, vn, vm, vm_index, OP); \
3351 }
NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)3352 NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
3353 #undef DEFINE_ASM_FUNC
3354
3355 #define NEON_BYELEMENT_LONG_LIST(V) \
3356 V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \
3357 V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \
3358 V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \
3359 V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3360 V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \
3361 V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \
3362 V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \
3363 V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \
3364 V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \
3365 V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \
3366 V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \
3367 V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3368 V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \
3369 V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3370 V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \
3371 V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \
3372 V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \
3373 V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ())
3374
3375 #define DEFINE_ASM_FUNC(FN, OP, AS) \
3376 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3377 const VRegister& vm, int vm_index) { \
3378 DCHECK(AS); \
3379 NEONByElementL(vd, vn, vm, vm_index, OP); \
3380 }
3381 NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
3382 #undef DEFINE_ASM_FUNC
3383
3384 void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
3385 NEON2RegMisc(vd, vn, NEON_SUQADD);
3386 }
3387
usqadd(const VRegister & vd,const VRegister & vn)3388 void Assembler::usqadd(const VRegister& vd, const VRegister& vn) {
3389 NEON2RegMisc(vd, vn, NEON_USQADD);
3390 }
3391
abs(const VRegister & vd,const VRegister & vn)3392 void Assembler::abs(const VRegister& vd, const VRegister& vn) {
3393 DCHECK(vd.IsVector() || vd.Is1D());
3394 NEON2RegMisc(vd, vn, NEON_ABS);
3395 }
3396
sqabs(const VRegister & vd,const VRegister & vn)3397 void Assembler::sqabs(const VRegister& vd, const VRegister& vn) {
3398 NEON2RegMisc(vd, vn, NEON_SQABS);
3399 }
3400
neg(const VRegister & vd,const VRegister & vn)3401 void Assembler::neg(const VRegister& vd, const VRegister& vn) {
3402 DCHECK(vd.IsVector() || vd.Is1D());
3403 NEON2RegMisc(vd, vn, NEON_NEG);
3404 }
3405
sqneg(const VRegister & vd,const VRegister & vn)3406 void Assembler::sqneg(const VRegister& vd, const VRegister& vn) {
3407 NEON2RegMisc(vd, vn, NEON_SQNEG);
3408 }
3409
NEONXtn(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp vop)3410 void Assembler::NEONXtn(const VRegister& vd, const VRegister& vn,
3411 NEON2RegMiscOp vop) {
3412 Instr format, op = vop;
3413 if (vd.IsScalar()) {
3414 DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
3415 (vd.Is1S() && vn.Is1D()));
3416 op |= NEON_Q | NEONScalar;
3417 format = SFormat(vd);
3418 } else {
3419 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
3420 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
3421 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
3422 format = VFormat(vd);
3423 }
3424 Emit(format | op | Rn(vn) | Rd(vd));
3425 }
3426
xtn(const VRegister & vd,const VRegister & vn)3427 void Assembler::xtn(const VRegister& vd, const VRegister& vn) {
3428 DCHECK(vd.IsVector() && vd.IsD());
3429 NEONXtn(vd, vn, NEON_XTN);
3430 }
3431
xtn2(const VRegister & vd,const VRegister & vn)3432 void Assembler::xtn2(const VRegister& vd, const VRegister& vn) {
3433 DCHECK(vd.IsVector() && vd.IsQ());
3434 NEONXtn(vd, vn, NEON_XTN);
3435 }
3436
sqxtn(const VRegister & vd,const VRegister & vn)3437 void Assembler::sqxtn(const VRegister& vd, const VRegister& vn) {
3438 DCHECK(vd.IsScalar() || vd.IsD());
3439 NEONXtn(vd, vn, NEON_SQXTN);
3440 }
3441
sqxtn2(const VRegister & vd,const VRegister & vn)3442 void Assembler::sqxtn2(const VRegister& vd, const VRegister& vn) {
3443 DCHECK(vd.IsVector() && vd.IsQ());
3444 NEONXtn(vd, vn, NEON_SQXTN);
3445 }
3446
sqxtun(const VRegister & vd,const VRegister & vn)3447 void Assembler::sqxtun(const VRegister& vd, const VRegister& vn) {
3448 DCHECK(vd.IsScalar() || vd.IsD());
3449 NEONXtn(vd, vn, NEON_SQXTUN);
3450 }
3451
sqxtun2(const VRegister & vd,const VRegister & vn)3452 void Assembler::sqxtun2(const VRegister& vd, const VRegister& vn) {
3453 DCHECK(vd.IsVector() && vd.IsQ());
3454 NEONXtn(vd, vn, NEON_SQXTUN);
3455 }
3456
uqxtn(const VRegister & vd,const VRegister & vn)3457 void Assembler::uqxtn(const VRegister& vd, const VRegister& vn) {
3458 DCHECK(vd.IsScalar() || vd.IsD());
3459 NEONXtn(vd, vn, NEON_UQXTN);
3460 }
3461
uqxtn2(const VRegister & vd,const VRegister & vn)3462 void Assembler::uqxtn2(const VRegister& vd, const VRegister& vn) {
3463 DCHECK(vd.IsVector() && vd.IsQ());
3464 NEONXtn(vd, vn, NEON_UQXTN);
3465 }
3466
3467 // NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
not_(const VRegister & vd,const VRegister & vn)3468 void Assembler::not_(const VRegister& vd, const VRegister& vn) {
3469 DCHECK(AreSameFormat(vd, vn));
3470 DCHECK(vd.Is8B() || vd.Is16B());
3471 Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
3472 }
3473
rbit(const VRegister & vd,const VRegister & vn)3474 void Assembler::rbit(const VRegister& vd, const VRegister& vn) {
3475 DCHECK(AreSameFormat(vd, vn));
3476 DCHECK(vd.Is8B() || vd.Is16B());
3477 Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
3478 }
3479
ext(const VRegister & vd,const VRegister & vn,const VRegister & vm,int index)3480 void Assembler::ext(const VRegister& vd, const VRegister& vn,
3481 const VRegister& vm, int index) {
3482 DCHECK(AreSameFormat(vd, vn, vm));
3483 DCHECK(vd.Is8B() || vd.Is16B());
3484 DCHECK((0 <= index) && (index < vd.LaneCount()));
3485 Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
3486 }
3487
dup(const VRegister & vd,const VRegister & vn,int vn_index)3488 void Assembler::dup(const VRegister& vd, const VRegister& vn, int vn_index) {
3489 Instr q, scalar;
3490
3491 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
3492 // number of lanes, and T is b, h, s or d.
3493 int lane_size = vn.LaneSizeInBytes();
3494 NEONFormatField format;
3495 switch (lane_size) {
3496 case 1:
3497 format = NEON_16B;
3498 break;
3499 case 2:
3500 format = NEON_8H;
3501 break;
3502 case 4:
3503 format = NEON_4S;
3504 break;
3505 default:
3506 DCHECK_EQ(lane_size, 8);
3507 format = NEON_2D;
3508 break;
3509 }
3510
3511 if (vd.IsScalar()) {
3512 q = NEON_Q;
3513 scalar = NEONScalar;
3514 } else {
3515 DCHECK(!vd.Is1D());
3516 q = vd.IsD() ? 0 : NEON_Q;
3517 scalar = 0;
3518 }
3519 Emit(q | scalar | NEON_DUP_ELEMENT | ImmNEON5(format, vn_index) | Rn(vn) |
3520 Rd(vd));
3521 }
3522
dcptr(Label * label)3523 void Assembler::dcptr(Label* label) {
3524 BlockPoolsScope no_pool_inbetween(this);
3525 RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
3526 if (label->is_bound()) {
3527 // The label is bound, so it does not need to be updated and the internal
3528 // reference should be emitted.
3529 //
3530 // In this case, label->pos() returns the offset of the label from the
3531 // start of the buffer.
3532 internal_reference_positions_.push_back(pc_offset());
3533 dc64(reinterpret_cast<uintptr_t>(buffer_start_ + label->pos()));
3534 } else {
3535 int32_t offset;
3536 if (label->is_linked()) {
3537 // The label is linked, so the internal reference should be added
3538 // onto the end of the label's link chain.
3539 //
3540 // In this case, label->pos() returns the offset of the last linked
3541 // instruction from the start of the buffer.
3542 offset = label->pos() - pc_offset();
3543 DCHECK_NE(offset, kStartOfLabelLinkChain);
3544 } else {
3545 // The label is unused, so it now becomes linked and the internal
3546 // reference is at the start of the new link chain.
3547 offset = kStartOfLabelLinkChain;
3548 }
3549 // The instruction at pc is now the last link in the label's chain.
3550 label->link_to(pc_offset());
3551
3552 // Traditionally the offset to the previous instruction in the chain is
3553 // encoded in the instruction payload (e.g. branch range) but internal
3554 // references are not instructions so while unbound they are encoded as
3555 // two consecutive brk instructions. The two 16-bit immediates are used
3556 // to encode the offset.
3557 offset >>= kInstrSizeLog2;
3558 DCHECK(is_int32(offset));
3559 uint32_t high16 = unsigned_bitextract_32(31, 16, offset);
3560 uint32_t low16 = unsigned_bitextract_32(15, 0, offset);
3561
3562 brk(high16);
3563 brk(low16);
3564 }
3565 }
3566
3567 // Below, a difference in case for the same letter indicates a
3568 // negated bit. If b is 1, then B is 0.
FPToImm8(double imm)3569 uint32_t Assembler::FPToImm8(double imm) {
3570 DCHECK(IsImmFP64(imm));
3571 // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
3572 // 0000.0000.0000.0000.0000.0000.0000.0000
3573 uint64_t bits = bit_cast<uint64_t>(imm);
3574 // bit7: a000.0000
3575 uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
3576 // bit6: 0b00.0000
3577 uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
3578 // bit5_to_0: 00cd.efgh
3579 uint64_t bit5_to_0 = (bits >> 48) & 0x3F;
3580
3581 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
3582 }
3583
ImmFP(double imm)3584 Instr Assembler::ImmFP(double imm) { return FPToImm8(imm) << ImmFP_offset; }
ImmNEONFP(double imm)3585 Instr Assembler::ImmNEONFP(double imm) {
3586 return ImmNEONabcdefgh(FPToImm8(imm));
3587 }
3588
3589 // Code generation helpers.
MoveWide(const Register & rd,uint64_t imm,int shift,MoveWideImmediateOp mov_op)3590 void Assembler::MoveWide(const Register& rd, uint64_t imm, int shift,
3591 MoveWideImmediateOp mov_op) {
3592 // Ignore the top 32 bits of an immediate if we're moving to a W register.
3593 if (rd.Is32Bits()) {
3594 // Check that the top 32 bits are zero (a positive 32-bit number) or top
3595 // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
3596 DCHECK(((imm >> kWRegSizeInBits) == 0) ||
3597 ((imm >> (kWRegSizeInBits - 1)) == 0x1FFFFFFFF));
3598 imm &= kWRegMask;
3599 }
3600
3601 if (shift >= 0) {
3602 // Explicit shift specified.
3603 DCHECK((shift == 0) || (shift == 16) || (shift == 32) || (shift == 48));
3604 DCHECK(rd.Is64Bits() || (shift == 0) || (shift == 16));
3605 shift /= 16;
3606 } else {
3607 // Calculate a new immediate and shift combination to encode the immediate
3608 // argument.
3609 shift = 0;
3610 if ((imm & ~0xFFFFULL) == 0) {
3611 // Nothing to do.
3612 } else if ((imm & ~(0xFFFFULL << 16)) == 0) {
3613 imm >>= 16;
3614 shift = 1;
3615 } else if ((imm & ~(0xFFFFULL << 32)) == 0) {
3616 DCHECK(rd.Is64Bits());
3617 imm >>= 32;
3618 shift = 2;
3619 } else if ((imm & ~(0xFFFFULL << 48)) == 0) {
3620 DCHECK(rd.Is64Bits());
3621 imm >>= 48;
3622 shift = 3;
3623 }
3624 }
3625
3626 DCHECK(is_uint16(imm));
3627
3628 Emit(SF(rd) | MoveWideImmediateFixed | mov_op | Rd(rd) |
3629 ImmMoveWide(static_cast<int>(imm)) | ShiftMoveWide(shift));
3630 }
3631
AddSub(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,AddSubOp op)3632 void Assembler::AddSub(const Register& rd, const Register& rn,
3633 const Operand& operand, FlagsUpdate S, AddSubOp op) {
3634 DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3635 DCHECK(!operand.NeedsRelocation(this));
3636 if (operand.IsImmediate()) {
3637 int64_t immediate = operand.ImmediateValue();
3638 DCHECK(IsImmAddSub(immediate));
3639 Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
3640 Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
3641 ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
3642 } else if (operand.IsShiftedRegister()) {
3643 DCHECK_EQ(operand.reg().SizeInBits(), rd.SizeInBits());
3644 DCHECK_NE(operand.shift(), ROR);
3645
3646 // For instructions of the form:
3647 // add/sub wsp, <Wn>, <Wm> [, LSL #0-3 ]
3648 // add/sub <Wd>, wsp, <Wm> [, LSL #0-3 ]
3649 // add/sub wsp, wsp, <Wm> [, LSL #0-3 ]
3650 // adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ]
3651 // or their 64-bit register equivalents, convert the operand from shifted to
3652 // extended register mode, and emit an add/sub extended instruction.
3653 if (rn.IsSP() || rd.IsSP()) {
3654 DCHECK(!(rd.IsSP() && (S == SetFlags)));
3655 DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S,
3656 AddSubExtendedFixed | op);
3657 } else {
3658 DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op);
3659 }
3660 } else {
3661 DCHECK(operand.IsExtendedRegister());
3662 DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op);
3663 }
3664 }
3665
AddSubWithCarry(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,AddSubWithCarryOp op)3666 void Assembler::AddSubWithCarry(const Register& rd, const Register& rn,
3667 const Operand& operand, FlagsUpdate S,
3668 AddSubWithCarryOp op) {
3669 DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3670 DCHECK_EQ(rd.SizeInBits(), operand.reg().SizeInBits());
3671 DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
3672 DCHECK(!operand.NeedsRelocation(this));
3673 Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
3674 }
3675
hlt(int code)3676 void Assembler::hlt(int code) {
3677 DCHECK(is_uint16(code));
3678 Emit(HLT | ImmException(code));
3679 }
3680
brk(int code)3681 void Assembler::brk(int code) {
3682 DCHECK(is_uint16(code));
3683 Emit(BRK | ImmException(code));
3684 }
3685
EmitStringData(const char * string)3686 void Assembler::EmitStringData(const char* string) {
3687 size_t len = strlen(string) + 1;
3688 DCHECK_LE(RoundUp(len, kInstrSize), static_cast<size_t>(kGap));
3689 EmitData(string, static_cast<int>(len));
3690 // Pad with nullptr characters until pc_ is aligned.
3691 const char pad[] = {'\0', '\0', '\0', '\0'};
3692 static_assert(sizeof(pad) == kInstrSize,
3693 "Size of padding must match instruction size.");
3694 EmitData(pad, RoundUp(pc_offset(), kInstrSize) - pc_offset());
3695 }
3696
debug(const char * message,uint32_t code,Instr params)3697 void Assembler::debug(const char* message, uint32_t code, Instr params) {
3698 if (options().enable_simulator_code) {
3699 // The arguments to the debug marker need to be contiguous in memory, so
3700 // make sure we don't try to emit pools.
3701 BlockPoolsScope scope(this);
3702
3703 Label start;
3704 bind(&start);
3705
3706 // Refer to instructions-arm64.h for a description of the marker and its
3707 // arguments.
3708 hlt(kImmExceptionIsDebug);
3709 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugCodeOffset);
3710 dc32(code);
3711 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugParamsOffset);
3712 dc32(params);
3713 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugMessageOffset);
3714 EmitStringData(message);
3715 hlt(kImmExceptionIsUnreachable);
3716
3717 return;
3718 }
3719
3720 if (params & BREAK) {
3721 brk(0);
3722 }
3723 }
3724
Logical(const Register & rd,const Register & rn,const Operand & operand,LogicalOp op)3725 void Assembler::Logical(const Register& rd, const Register& rn,
3726 const Operand& operand, LogicalOp op) {
3727 DCHECK(rd.SizeInBits() == rn.SizeInBits());
3728 DCHECK(!operand.NeedsRelocation(this));
3729 if (operand.IsImmediate()) {
3730 int64_t immediate = operand.ImmediateValue();
3731 unsigned reg_size = rd.SizeInBits();
3732
3733 DCHECK_NE(immediate, 0);
3734 DCHECK_NE(immediate, -1);
3735 DCHECK(rd.Is64Bits() || is_uint32(immediate));
3736
3737 // If the operation is NOT, invert the operation and immediate.
3738 if ((op & NOT) == NOT) {
3739 op = static_cast<LogicalOp>(op & ~NOT);
3740 immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask);
3741 }
3742
3743 unsigned n, imm_s, imm_r;
3744 if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
3745 // Immediate can be encoded in the instruction.
3746 LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
3747 } else {
3748 // This case is handled in the macro assembler.
3749 UNREACHABLE();
3750 }
3751 } else {
3752 DCHECK(operand.IsShiftedRegister());
3753 DCHECK(operand.reg().SizeInBits() == rd.SizeInBits());
3754 Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed);
3755 DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op);
3756 }
3757 }
3758
LogicalImmediate(const Register & rd,const Register & rn,unsigned n,unsigned imm_s,unsigned imm_r,LogicalOp op)3759 void Assembler::LogicalImmediate(const Register& rd, const Register& rn,
3760 unsigned n, unsigned imm_s, unsigned imm_r,
3761 LogicalOp op) {
3762 unsigned reg_size = rd.SizeInBits();
3763 Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd);
3764 Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) |
3765 ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg |
3766 Rn(rn));
3767 }
3768
ConditionalCompare(const Register & rn,const Operand & operand,StatusFlags nzcv,Condition cond,ConditionalCompareOp op)3769 void Assembler::ConditionalCompare(const Register& rn, const Operand& operand,
3770 StatusFlags nzcv, Condition cond,
3771 ConditionalCompareOp op) {
3772 Instr ccmpop;
3773 DCHECK(!operand.NeedsRelocation(this));
3774 if (operand.IsImmediate()) {
3775 int64_t immediate = operand.ImmediateValue();
3776 DCHECK(IsImmConditionalCompare(immediate));
3777 ccmpop = ConditionalCompareImmediateFixed | op |
3778 ImmCondCmp(static_cast<unsigned>(immediate));
3779 } else {
3780 DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
3781 ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg());
3782 }
3783 Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv));
3784 }
3785
DataProcessing1Source(const Register & rd,const Register & rn,DataProcessing1SourceOp op)3786 void Assembler::DataProcessing1Source(const Register& rd, const Register& rn,
3787 DataProcessing1SourceOp op) {
3788 DCHECK(rd.SizeInBits() == rn.SizeInBits());
3789 Emit(SF(rn) | op | Rn(rn) | Rd(rd));
3790 }
3791
FPDataProcessing1Source(const VRegister & vd,const VRegister & vn,FPDataProcessing1SourceOp op)3792 void Assembler::FPDataProcessing1Source(const VRegister& vd,
3793 const VRegister& vn,
3794 FPDataProcessing1SourceOp op) {
3795 Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
3796 }
3797
FPDataProcessing2Source(const VRegister & fd,const VRegister & fn,const VRegister & fm,FPDataProcessing2SourceOp op)3798 void Assembler::FPDataProcessing2Source(const VRegister& fd,
3799 const VRegister& fn,
3800 const VRegister& fm,
3801 FPDataProcessing2SourceOp op) {
3802 DCHECK(fd.SizeInBits() == fn.SizeInBits());
3803 DCHECK(fd.SizeInBits() == fm.SizeInBits());
3804 Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd));
3805 }
3806
FPDataProcessing3Source(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa,FPDataProcessing3SourceOp op)3807 void Assembler::FPDataProcessing3Source(const VRegister& fd,
3808 const VRegister& fn,
3809 const VRegister& fm,
3810 const VRegister& fa,
3811 FPDataProcessing3SourceOp op) {
3812 DCHECK(AreSameSizeAndType(fd, fn, fm, fa));
3813 Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd) | Ra(fa));
3814 }
3815
NEONModifiedImmShiftLsl(const VRegister & vd,const int imm8,const int left_shift,NEONModifiedImmediateOp op)3816 void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
3817 const int left_shift,
3818 NEONModifiedImmediateOp op) {
3819 DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || vd.Is2S() ||
3820 vd.Is4S());
3821 DCHECK((left_shift == 0) || (left_shift == 8) || (left_shift == 16) ||
3822 (left_shift == 24));
3823 DCHECK(is_uint8(imm8));
3824
3825 int cmode_1, cmode_2, cmode_3;
3826 if (vd.Is8B() || vd.Is16B()) {
3827 DCHECK_EQ(op, NEONModifiedImmediate_MOVI);
3828 cmode_1 = 1;
3829 cmode_2 = 1;
3830 cmode_3 = 1;
3831 } else {
3832 cmode_1 = (left_shift >> 3) & 1;
3833 cmode_2 = left_shift >> 4;
3834 cmode_3 = 0;
3835 if (vd.Is4H() || vd.Is8H()) {
3836 DCHECK((left_shift == 0) || (left_shift == 8));
3837 cmode_3 = 1;
3838 }
3839 }
3840 int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);
3841
3842 Instr q = vd.IsQ() ? NEON_Q : 0;
3843
3844 Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
3845 }
3846
NEONModifiedImmShiftMsl(const VRegister & vd,const int imm8,const int shift_amount,NEONModifiedImmediateOp op)3847 void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
3848 const int shift_amount,
3849 NEONModifiedImmediateOp op) {
3850 DCHECK(vd.Is2S() || vd.Is4S());
3851 DCHECK((shift_amount == 8) || (shift_amount == 16));
3852 DCHECK(is_uint8(imm8));
3853
3854 int cmode_0 = (shift_amount >> 4) & 1;
3855 int cmode = 0xC | cmode_0;
3856
3857 Instr q = vd.IsQ() ? NEON_Q : 0;
3858
3859 Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
3860 }
3861
EmitShift(const Register & rd,const Register & rn,Shift shift,unsigned shift_amount)3862 void Assembler::EmitShift(const Register& rd, const Register& rn, Shift shift,
3863 unsigned shift_amount) {
3864 switch (shift) {
3865 case LSL:
3866 lsl(rd, rn, shift_amount);
3867 break;
3868 case LSR:
3869 lsr(rd, rn, shift_amount);
3870 break;
3871 case ASR:
3872 asr(rd, rn, shift_amount);
3873 break;
3874 case ROR:
3875 ror(rd, rn, shift_amount);
3876 break;
3877 default:
3878 UNREACHABLE();
3879 }
3880 }
3881
EmitExtendShift(const Register & rd,const Register & rn,Extend extend,unsigned left_shift)3882 void Assembler::EmitExtendShift(const Register& rd, const Register& rn,
3883 Extend extend, unsigned left_shift) {
3884 DCHECK(rd.SizeInBits() >= rn.SizeInBits());
3885 unsigned reg_size = rd.SizeInBits();
3886 // Use the correct size of register.
3887 Register rn_ = Register::Create(rn.code(), rd.SizeInBits());
3888 // Bits extracted are high_bit:0.
3889 unsigned high_bit = (8 << (extend & 0x3)) - 1;
3890 // Number of bits left in the result that are not introduced by the shift.
3891 unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1);
3892
3893 if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) {
3894 switch (extend) {
3895 case UXTB:
3896 case UXTH:
3897 case UXTW:
3898 ubfm(rd, rn_, non_shift_bits, high_bit);
3899 break;
3900 case SXTB:
3901 case SXTH:
3902 case SXTW:
3903 sbfm(rd, rn_, non_shift_bits, high_bit);
3904 break;
3905 case UXTX:
3906 case SXTX: {
3907 DCHECK_EQ(rn.SizeInBits(), kXRegSizeInBits);
3908 // Nothing to extend. Just shift.
3909 lsl(rd, rn_, left_shift);
3910 break;
3911 }
3912 default:
3913 UNREACHABLE();
3914 }
3915 } else {
3916 // No need to extend as the extended bits would be shifted away.
3917 lsl(rd, rn_, left_shift);
3918 }
3919 }
3920
DataProcShiftedRegister(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,Instr op)3921 void Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn,
3922 const Operand& operand, FlagsUpdate S,
3923 Instr op) {
3924 DCHECK(operand.IsShiftedRegister());
3925 DCHECK(rn.Is64Bits() || (rn.Is32Bits() && is_uint5(operand.shift_amount())));
3926 DCHECK(!operand.NeedsRelocation(this));
3927 Emit(SF(rd) | op | Flags(S) | ShiftDP(operand.shift()) |
3928 ImmDPShift(operand.shift_amount()) | Rm(operand.reg()) | Rn(rn) |
3929 Rd(rd));
3930 }
3931
DataProcExtendedRegister(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,Instr op)3932 void Assembler::DataProcExtendedRegister(const Register& rd, const Register& rn,
3933 const Operand& operand, FlagsUpdate S,
3934 Instr op) {
3935 DCHECK(!operand.NeedsRelocation(this));
3936 Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
3937 Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) |
3938 ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) |
3939 dest_reg | RnSP(rn));
3940 }
3941
IsImmAddSub(int64_t immediate)3942 bool Assembler::IsImmAddSub(int64_t immediate) {
3943 return is_uint12(immediate) ||
3944 (is_uint12(immediate >> 12) && ((immediate & 0xFFF) == 0));
3945 }
3946
LoadStore(const CPURegister & rt,const MemOperand & addr,LoadStoreOp op)3947 void Assembler::LoadStore(const CPURegister& rt, const MemOperand& addr,
3948 LoadStoreOp op) {
3949 Instr memop = op | Rt(rt) | RnSP(addr.base());
3950
3951 if (addr.IsImmediateOffset()) {
3952 unsigned size = CalcLSDataSize(op);
3953 if (IsImmLSScaled(addr.offset(), size)) {
3954 int offset = static_cast<int>(addr.offset());
3955 // Use the scaled addressing mode.
3956 Emit(LoadStoreUnsignedOffsetFixed | memop |
3957 ImmLSUnsigned(offset >> size));
3958 } else if (IsImmLSUnscaled(addr.offset())) {
3959 int offset = static_cast<int>(addr.offset());
3960 // Use the unscaled addressing mode.
3961 Emit(LoadStoreUnscaledOffsetFixed | memop | ImmLS(offset));
3962 } else {
3963 // This case is handled in the macro assembler.
3964 UNREACHABLE();
3965 }
3966 } else if (addr.IsRegisterOffset()) {
3967 Extend ext = addr.extend();
3968 Shift shift = addr.shift();
3969 unsigned shift_amount = addr.shift_amount();
3970
3971 // LSL is encoded in the option field as UXTX.
3972 if (shift == LSL) {
3973 ext = UXTX;
3974 }
3975
3976 // Shifts are encoded in one bit, indicating a left shift by the memory
3977 // access size.
3978 DCHECK((shift_amount == 0) ||
3979 (shift_amount == static_cast<unsigned>(CalcLSDataSize(op))));
3980 Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
3981 ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
3982 } else {
3983 // Pre-index and post-index modes.
3984 DCHECK_NE(rt, addr.base());
3985 if (IsImmLSUnscaled(addr.offset())) {
3986 int offset = static_cast<int>(addr.offset());
3987 if (addr.IsPreIndex()) {
3988 Emit(LoadStorePreIndexFixed | memop | ImmLS(offset));
3989 } else {
3990 DCHECK(addr.IsPostIndex());
3991 Emit(LoadStorePostIndexFixed | memop | ImmLS(offset));
3992 }
3993 } else {
3994 // This case is handled in the macro assembler.
3995 UNREACHABLE();
3996 }
3997 }
3998 }
3999
IsImmLSUnscaled(int64_t offset)4000 bool Assembler::IsImmLSUnscaled(int64_t offset) { return is_int9(offset); }
4001
IsImmLSScaled(int64_t offset,unsigned size)4002 bool Assembler::IsImmLSScaled(int64_t offset, unsigned size) {
4003 bool offset_is_size_multiple =
4004 (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
4005 offset);
4006 return offset_is_size_multiple && is_uint12(offset >> size);
4007 }
4008
IsImmLSPair(int64_t offset,unsigned size)4009 bool Assembler::IsImmLSPair(int64_t offset, unsigned size) {
4010 bool offset_is_size_multiple =
4011 (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
4012 offset);
4013 return offset_is_size_multiple && is_int7(offset >> size);
4014 }
4015
IsImmLLiteral(int64_t offset)4016 bool Assembler::IsImmLLiteral(int64_t offset) {
4017 int inst_size = static_cast<int>(kInstrSizeLog2);
4018 bool offset_is_inst_multiple =
4019 (static_cast<int64_t>(static_cast<uint64_t>(offset >> inst_size)
4020 << inst_size) == offset);
4021 DCHECK_GT(offset, 0);
4022 offset >>= kLoadLiteralScaleLog2;
4023 return offset_is_inst_multiple && is_intn(offset, ImmLLiteral_width);
4024 }
4025
4026 // Test if a given value can be encoded in the immediate field of a logical
4027 // instruction.
4028 // If it can be encoded, the function returns true, and values pointed to by n,
4029 // imm_s and imm_r are updated with immediates encoded in the format required
4030 // by the corresponding fields in the logical instruction.
4031 // If it can not be encoded, the function returns false, and the values pointed
4032 // to by n, imm_s and imm_r are undefined.
IsImmLogical(uint64_t value,unsigned width,unsigned * n,unsigned * imm_s,unsigned * imm_r)4033 bool Assembler::IsImmLogical(uint64_t value, unsigned width, unsigned* n,
4034 unsigned* imm_s, unsigned* imm_r) {
4035 DCHECK((n != nullptr) && (imm_s != nullptr) && (imm_r != nullptr));
4036 DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));
4037
4038 bool negate = false;
4039
4040 // Logical immediates are encoded using parameters n, imm_s and imm_r using
4041 // the following table:
4042 //
4043 // N imms immr size S R
4044 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
4045 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
4046 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
4047 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
4048 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
4049 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
4050 // (s bits must not be all set)
4051 //
4052 // A pattern is constructed of size bits, where the least significant S+1 bits
4053 // are set. The pattern is rotated right by R, and repeated across a 32 or
4054 // 64-bit value, depending on destination register width.
4055 //
4056 // Put another way: the basic format of a logical immediate is a single
4057 // contiguous stretch of 1 bits, repeated across the whole word at intervals
4058 // given by a power of 2. To identify them quickly, we first locate the
4059 // lowest stretch of 1 bits, then the next 1 bit above that; that combination
4060 // is different for every logical immediate, so it gives us all the
4061 // information we need to identify the only logical immediate that our input
4062 // could be, and then we simply check if that's the value we actually have.
4063 //
4064 // (The rotation parameter does give the possibility of the stretch of 1 bits
4065 // going 'round the end' of the word. To deal with that, we observe that in
4066 // any situation where that happens the bitwise NOT of the value is also a
4067 // valid logical immediate. So we simply invert the input whenever its low bit
4068 // is set, and then we know that the rotated case can't arise.)
4069
4070 if (value & 1) {
4071 // If the low bit is 1, negate the value, and set a flag to remember that we
4072 // did (so that we can adjust the return values appropriately).
4073 negate = true;
4074 value = ~value;
4075 }
4076
4077 if (width == kWRegSizeInBits) {
4078 // To handle 32-bit logical immediates, the very easiest thing is to repeat
4079 // the input value twice to make a 64-bit word. The correct encoding of that
4080 // as a logical immediate will also be the correct encoding of the 32-bit
4081 // value.
4082
4083 // The most-significant 32 bits may not be zero (ie. negate is true) so
4084 // shift the value left before duplicating it.
4085 value <<= kWRegSizeInBits;
4086 value |= value >> kWRegSizeInBits;
4087 }
4088
4089 // The basic analysis idea: imagine our input word looks like this.
4090 //
4091 // 0011111000111110001111100011111000111110001111100011111000111110
4092 // c b a
4093 // |<--d-->|
4094 //
4095 // We find the lowest set bit (as an actual power-of-2 value, not its index)
4096 // and call it a. Then we add a to our original number, which wipes out the
4097 // bottommost stretch of set bits and replaces it with a 1 carried into the
4098 // next zero bit. Then we look for the new lowest set bit, which is in
4099 // position b, and subtract it, so now our number is just like the original
4100 // but with the lowest stretch of set bits completely gone. Now we find the
4101 // lowest set bit again, which is position c in the diagram above. Then we'll
4102 // measure the distance d between bit positions a and c (using CLZ), and that
4103 // tells us that the only valid logical immediate that could possibly be equal
4104 // to this number is the one in which a stretch of bits running from a to just
4105 // below b is replicated every d bits.
4106 uint64_t a = LargestPowerOf2Divisor(value);
4107 uint64_t value_plus_a = value + a;
4108 uint64_t b = LargestPowerOf2Divisor(value_plus_a);
4109 uint64_t value_plus_a_minus_b = value_plus_a - b;
4110 uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);
4111
4112 int d, clz_a, out_n;
4113 uint64_t mask;
4114
4115 if (c != 0) {
4116 // The general case, in which there is more than one stretch of set bits.
4117 // Compute the repeat distance d, and set up a bitmask covering the basic
4118 // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
4119 // of these cases the N bit of the output will be zero.
4120 clz_a = CountLeadingZeros(a, kXRegSizeInBits);
4121 int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
4122 d = clz_a - clz_c;
4123 mask = ((uint64_t{1} << d) - 1);
4124 out_n = 0;
4125 } else {
4126 // Handle degenerate cases.
4127 //
4128 // If any of those 'find lowest set bit' operations didn't find a set bit at
4129 // all, then the word will have been zero thereafter, so in particular the
4130 // last lowest_set_bit operation will have returned zero. So we can test for
4131 // all the special case conditions in one go by seeing if c is zero.
4132 if (a == 0) {
4133 // The input was zero (or all 1 bits, which will come to here too after we
4134 // inverted it at the start of the function), for which we just return
4135 // false.
4136 return false;
4137 } else {
4138 // Otherwise, if c was zero but a was not, then there's just one stretch
4139 // of set bits in our word, meaning that we have the trivial case of
4140 // d == 64 and only one 'repetition'. Set up all the same variables as in
4141 // the general case above, and set the N bit in the output.
4142 clz_a = CountLeadingZeros(a, kXRegSizeInBits);
4143 d = 64;
4144 mask = ~uint64_t{0};
4145 out_n = 1;
4146 }
4147 }
4148
4149 // If the repeat period d is not a power of two, it can't be encoded.
4150 if (!base::bits::IsPowerOfTwo(d)) {
4151 return false;
4152 }
4153
4154 if (((b - a) & ~mask) != 0) {
4155 // If the bit stretch (b - a) does not fit within the mask derived from the
4156 // repeat period, then fail.
4157 return false;
4158 }
4159
4160 // The only possible option is b - a repeated every d bits. Now we're going to
4161 // actually construct the valid logical immediate derived from that
4162 // specification, and see if it equals our original input.
4163 //
4164 // To repeat a value every d bits, we multiply it by a number of the form
4165 // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
4166 // be derived using a table lookup on CLZ(d).
4167 static const uint64_t multipliers[] = {
4168 0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL,
4169 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
4170 };
4171 int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
4172 // Ensure that the index to the multipliers array is within bounds.
4173 DCHECK((multiplier_idx >= 0) &&
4174 (static_cast<size_t>(multiplier_idx) < arraysize(multipliers)));
4175 uint64_t multiplier = multipliers[multiplier_idx];
4176 uint64_t candidate = (b - a) * multiplier;
4177
4178 if (value != candidate) {
4179 // The candidate pattern doesn't match our input value, so fail.
4180 return false;
4181 }
4182
4183 // We have a match! This is a valid logical immediate, so now we have to
4184 // construct the bits and pieces of the instruction encoding that generates
4185 // it.
4186
4187 // Count the set bits in our basic stretch. The special case of clz(0) == -1
4188 // makes the answer come out right for stretches that reach the very top of
4189 // the word (e.g. numbers like 0xFFFFC00000000000).
4190 int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
4191 int s = clz_a - clz_b;
4192
4193 // Decide how many bits to rotate right by, to put the low bit of that basic
4194 // stretch in position a.
4195 int r;
4196 if (negate) {
4197 // If we inverted the input right at the start of this function, here's
4198 // where we compensate: the number of set bits becomes the number of clear
4199 // bits, and the rotation count is based on position b rather than position
4200 // a (since b is the location of the 'lowest' 1 bit after inversion).
4201 s = d - s;
4202 r = (clz_b + 1) & (d - 1);
4203 } else {
4204 r = (clz_a + 1) & (d - 1);
4205 }
4206
4207 // Now we're done, except for having to encode the S output in such a way that
4208 // it gives both the number of set bits and the length of the repeated
4209 // segment. The s field is encoded like this:
4210 //
4211 // imms size S
4212 // ssssss 64 UInt(ssssss)
4213 // 0sssss 32 UInt(sssss)
4214 // 10ssss 16 UInt(ssss)
4215 // 110sss 8 UInt(sss)
4216 // 1110ss 4 UInt(ss)
4217 // 11110s 2 UInt(s)
4218 //
4219 // So we 'or' (-d * 2) with our computed s to form imms.
4220 *n = out_n;
4221 *imm_s = ((-d * 2) | (s - 1)) & 0x3F;
4222 *imm_r = r;
4223
4224 return true;
4225 }
4226
IsImmConditionalCompare(int64_t immediate)4227 bool Assembler::IsImmConditionalCompare(int64_t immediate) {
4228 return is_uint5(immediate);
4229 }
4230
IsImmFP32(float imm)4231 bool Assembler::IsImmFP32(float imm) {
4232 // Valid values will have the form:
4233 // aBbb.bbbc.defg.h000.0000.0000.0000.0000
4234 uint32_t bits = bit_cast<uint32_t>(imm);
4235 // bits[19..0] are cleared.
4236 if ((bits & 0x7FFFF) != 0) {
4237 return false;
4238 }
4239
4240 // bits[29..25] are all set or all cleared.
4241 uint32_t b_pattern = (bits >> 16) & 0x3E00;
4242 if (b_pattern != 0 && b_pattern != 0x3E00) {
4243 return false;
4244 }
4245
4246 // bit[30] and bit[29] are opposite.
4247 if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
4248 return false;
4249 }
4250
4251 return true;
4252 }
4253
IsImmFP64(double imm)4254 bool Assembler::IsImmFP64(double imm) {
4255 // Valid values will have the form:
4256 // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
4257 // 0000.0000.0000.0000.0000.0000.0000.0000
4258 uint64_t bits = bit_cast<uint64_t>(imm);
4259 // bits[47..0] are cleared.
4260 if ((bits & 0xFFFFFFFFFFFFL) != 0) {
4261 return false;
4262 }
4263
4264 // bits[61..54] are all set or all cleared.
4265 uint32_t b_pattern = (bits >> 48) & 0x3FC0;
4266 if (b_pattern != 0 && b_pattern != 0x3FC0) {
4267 return false;
4268 }
4269
4270 // bit[62] and bit[61] are opposite.
4271 if (((bits ^ (bits << 1)) & 0x4000000000000000L) == 0) {
4272 return false;
4273 }
4274
4275 return true;
4276 }
4277
FixOnHeapReferences(bool update_embedded_objects)4278 void Assembler::FixOnHeapReferences(bool update_embedded_objects) {
4279 Address base = reinterpret_cast<Address>(buffer_->start());
4280 if (update_embedded_objects) {
4281 for (auto p : saved_handles_for_raw_object_ptr_) {
4282 Handle<HeapObject> object = GetEmbeddedObject(p.second);
4283 WriteUnalignedValue(base + p.first, object->ptr());
4284 }
4285 }
4286 for (auto p : saved_offsets_for_runtime_entries_) {
4287 Instruction* instr = reinterpret_cast<Instruction*>(base + p.first);
4288 Address target = p.second * kInstrSize + options().code_range_start;
4289 DCHECK(is_int26(p.second));
4290 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
4291 instr->SetBranchImmTarget(reinterpret_cast<Instruction*>(target));
4292 }
4293 }
4294
FixOnHeapReferencesToHandles()4295 void Assembler::FixOnHeapReferencesToHandles() {
4296 Address base = reinterpret_cast<Address>(buffer_->start());
4297 for (auto p : saved_handles_for_raw_object_ptr_) {
4298 WriteUnalignedValue(base + p.first, p.second);
4299 }
4300 saved_handles_for_raw_object_ptr_.clear();
4301 for (auto p : saved_offsets_for_runtime_entries_) {
4302 Instruction* instr = reinterpret_cast<Instruction*>(base + p.first);
4303 DCHECK(is_int26(p.second));
4304 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
4305 instr->SetInstructionBits(instr->Mask(UnconditionalBranchMask) | p.second);
4306 }
4307 saved_offsets_for_runtime_entries_.clear();
4308 }
4309
GrowBuffer()4310 void Assembler::GrowBuffer() {
4311 bool previously_on_heap = buffer_->IsOnHeap();
4312 int previous_on_heap_gc_count = OnHeapGCCount();
4313
4314 // Compute new buffer size.
4315 int old_size = buffer_->size();
4316 int new_size = std::min(2 * old_size, old_size + 1 * MB);
4317
4318 // Some internal data structures overflow for very large buffers,
4319 // they must ensure that kMaximalBufferSize is not too large.
4320 if (new_size > kMaximalBufferSize) {
4321 V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
4322 }
4323
4324 // Set up new buffer.
4325 std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
4326 DCHECK_EQ(new_size, new_buffer->size());
4327 byte* new_start = new_buffer->start();
4328
4329 // Copy the data.
4330 intptr_t pc_delta = new_start - buffer_start_;
4331 intptr_t rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
4332 size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
4333 memmove(new_start, buffer_start_, pc_offset());
4334 memmove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
4335 reloc_size);
4336
4337 // Switch buffers.
4338 buffer_ = std::move(new_buffer);
4339 buffer_start_ = new_start;
4340 pc_ += pc_delta;
4341 reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
4342 reloc_info_writer.last_pc() + pc_delta);
4343
4344 // None of our relocation types are pc relative pointing outside the code
4345 // buffer nor pc absolute pointing inside the code buffer, so there is no need
4346 // to relocate any emitted relocation entries.
4347
4348 // Relocate internal references.
4349 for (auto pos : internal_reference_positions_) {
4350 Address address = reinterpret_cast<intptr_t>(buffer_start_) + pos;
4351 intptr_t internal_ref = ReadUnalignedValue<intptr_t>(address);
4352 internal_ref += pc_delta;
4353 WriteUnalignedValue<intptr_t>(address, internal_ref);
4354 }
4355
4356 // Fix on-heap references.
4357 if (previously_on_heap) {
4358 if (buffer_->IsOnHeap()) {
4359 FixOnHeapReferences(previous_on_heap_gc_count != OnHeapGCCount());
4360 } else {
4361 FixOnHeapReferencesToHandles();
4362 }
4363 }
4364
4365 // Pending relocation entries are also relative, no need to relocate.
4366 }
4367
RecordRelocInfo(RelocInfo::Mode rmode,intptr_t data,ConstantPoolMode constant_pool_mode)4368 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data,
4369 ConstantPoolMode constant_pool_mode) {
4370 if ((rmode == RelocInfo::INTERNAL_REFERENCE) ||
4371 (rmode == RelocInfo::DATA_EMBEDDED_OBJECT) ||
4372 (rmode == RelocInfo::CONST_POOL) || (rmode == RelocInfo::VENEER_POOL) ||
4373 (rmode == RelocInfo::DEOPT_SCRIPT_OFFSET) ||
4374 (rmode == RelocInfo::DEOPT_INLINING_ID) ||
4375 (rmode == RelocInfo::DEOPT_REASON) || (rmode == RelocInfo::DEOPT_ID) ||
4376 (rmode == RelocInfo::LITERAL_CONSTANT) ||
4377 (rmode == RelocInfo::DEOPT_NODE_ID)) {
4378 // Adjust code for new modes.
4379 DCHECK(RelocInfo::IsDeoptReason(rmode) || RelocInfo::IsDeoptId(rmode) ||
4380 RelocInfo::IsDeoptNodeId(rmode) ||
4381 RelocInfo::IsDeoptPosition(rmode) ||
4382 RelocInfo::IsInternalReference(rmode) ||
4383 RelocInfo::IsDataEmbeddedObject(rmode) ||
4384 RelocInfo::IsLiteralConstant(rmode) ||
4385 RelocInfo::IsConstPool(rmode) || RelocInfo::IsVeneerPool(rmode));
4386 // These modes do not need an entry in the constant pool.
4387 } else if (constant_pool_mode == NEEDS_POOL_ENTRY) {
4388 if (RelocInfo::IsEmbeddedObjectMode(rmode)) {
4389 Handle<HeapObject> handle(reinterpret_cast<Address*>(data));
4390 data = AddEmbeddedObject(handle);
4391 }
4392 if (rmode == RelocInfo::COMPRESSED_EMBEDDED_OBJECT) {
4393 if (constpool_.RecordEntry(static_cast<uint32_t>(data), rmode) ==
4394 RelocInfoStatus::kMustOmitForDuplicate) {
4395 return;
4396 }
4397 } else {
4398 if (constpool_.RecordEntry(static_cast<uint64_t>(data), rmode) ==
4399 RelocInfoStatus::kMustOmitForDuplicate) {
4400 return;
4401 }
4402 }
4403 }
4404 // For modes that cannot use the constant pool, a different sequence of
4405 // instructions will be emitted by this function's caller.
4406
4407 if (!ShouldRecordRelocInfo(rmode)) return;
4408
4409 // Callers should ensure that constant pool emission is blocked until the
4410 // instruction the reloc info is associated with has been emitted.
4411 DCHECK(constpool_.IsBlocked());
4412
4413 // We do not try to reuse pool constants.
4414 RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
4415
4416 DCHECK_GE(buffer_space(), kMaxRelocSize); // too late to grow buffer here
4417 reloc_info_writer.Write(&rinfo);
4418 }
4419
near_jump(int offset,RelocInfo::Mode rmode)4420 void Assembler::near_jump(int offset, RelocInfo::Mode rmode) {
4421 BlockPoolsScope no_pool_before_b_instr(this);
4422 if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
4423 b(offset);
4424 }
4425
near_call(int offset,RelocInfo::Mode rmode)4426 void Assembler::near_call(int offset, RelocInfo::Mode rmode) {
4427 BlockPoolsScope no_pool_before_bl_instr(this);
4428 if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
4429 bl(offset);
4430 }
4431
near_call(HeapObjectRequest request)4432 void Assembler::near_call(HeapObjectRequest request) {
4433 BlockPoolsScope no_pool_before_bl_instr(this);
4434 RequestHeapObject(request);
4435 EmbeddedObjectIndex index = AddEmbeddedObject(Handle<Code>());
4436 RecordRelocInfo(RelocInfo::CODE_TARGET, index, NO_POOL_ENTRY);
4437 DCHECK(is_int32(index));
4438 bl(static_cast<int>(index));
4439 }
4440
4441 // Constant Pool
4442
EmitPrologue(Alignment require_alignment)4443 void ConstantPool::EmitPrologue(Alignment require_alignment) {
4444 // Recorded constant pool size is expressed in number of 32-bits words,
4445 // and includes prologue and alignment, but not the jump around the pool
4446 // and the size of the marker itself.
4447 const int marker_size = 1;
4448 int word_count =
4449 ComputeSize(Jump::kOmitted, require_alignment) / kInt32Size - marker_size;
4450 assm_->Emit(LDR_x_lit | Assembler::ImmLLiteral(word_count) |
4451 Assembler::Rt(xzr));
4452 assm_->EmitPoolGuard();
4453 }
4454
PrologueSize(Jump require_jump) const4455 int ConstantPool::PrologueSize(Jump require_jump) const {
4456 // Prologue is:
4457 // b over ;; if require_jump
4458 // ldr xzr, #pool_size
4459 // blr xzr
4460 int prologue_size = require_jump == Jump::kRequired ? kInstrSize : 0;
4461 prologue_size += 2 * kInstrSize;
4462 return prologue_size;
4463 }
4464
SetLoadOffsetToConstPoolEntry(int load_offset,Instruction * entry_offset,const ConstantPoolKey & key)4465 void ConstantPool::SetLoadOffsetToConstPoolEntry(int load_offset,
4466 Instruction* entry_offset,
4467 const ConstantPoolKey& key) {
4468 Instruction* instr = assm_->InstructionAt(load_offset);
4469 // Instruction to patch must be 'ldr rd, [pc, #offset]' with offset == 0.
4470 DCHECK(instr->IsLdrLiteral() && instr->ImmLLiteral() == 0);
4471 instr->SetImmPCOffsetTarget(assm_->options(), entry_offset);
4472 }
4473
Check(Emission force_emit,Jump require_jump,size_t margin)4474 void ConstantPool::Check(Emission force_emit, Jump require_jump,
4475 size_t margin) {
4476 // Some short sequence of instruction must not be broken up by constant pool
4477 // emission, such sequences are protected by a ConstPool::BlockScope.
4478 if (IsBlocked()) {
4479 // Something is wrong if emission is forced and blocked at the same time.
4480 DCHECK_EQ(force_emit, Emission::kIfNeeded);
4481 return;
4482 }
4483
4484 // We emit a constant pool only if :
4485 // * it is not empty
4486 // * emission is forced by parameter force_emit (e.g. at function end).
4487 // * emission is mandatory or opportune according to {ShouldEmitNow}.
4488 if (!IsEmpty() && (force_emit == Emission::kForced ||
4489 ShouldEmitNow(require_jump, margin))) {
4490 // Emit veneers for branches that would go out of range during emission of
4491 // the constant pool.
4492 int worst_case_size = ComputeSize(Jump::kRequired, Alignment::kRequired);
4493 assm_->CheckVeneerPool(false, require_jump == Jump::kRequired,
4494 assm_->kVeneerDistanceMargin + worst_case_size +
4495 static_cast<int>(margin));
4496
4497 // Check that the code buffer is large enough before emitting the constant
4498 // pool (this includes the gap to the relocation information).
4499 int needed_space = worst_case_size + assm_->kGap;
4500 while (assm_->buffer_space() <= needed_space) {
4501 assm_->GrowBuffer();
4502 }
4503
4504 EmitAndClear(require_jump);
4505 }
4506 // Since a constant pool is (now) empty, move the check offset forward by
4507 // the standard interval.
4508 SetNextCheckIn(ConstantPool::kCheckInterval);
4509 }
4510
4511 // Pool entries are accessed with pc relative load therefore this cannot be more
4512 // than 1 * MB. Since constant pool emission checks are interval based, and we
4513 // want to keep entries close to the code, we try to emit every 64KB.
4514 const size_t ConstantPool::kMaxDistToPool32 = 1 * MB;
4515 const size_t ConstantPool::kMaxDistToPool64 = 1 * MB;
4516 const size_t ConstantPool::kCheckInterval = 128 * kInstrSize;
4517 const size_t ConstantPool::kApproxDistToPool32 = 64 * KB;
4518 const size_t ConstantPool::kApproxDistToPool64 = kApproxDistToPool32;
4519
4520 const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
4521 const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
4522 const size_t ConstantPool::kApproxMaxEntryCount = 512;
4523
MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin)4524 intptr_t Assembler::MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin) {
4525 // Account for the branch and guard around the veneers.
4526 static constexpr int kBranchSizeInBytes = kInstrSize;
4527 static constexpr int kGuardSizeInBytes = kInstrSize;
4528 const size_t max_veneer_size_in_bytes =
4529 unresolved_branches_.size() * kVeneerCodeSize;
4530 return static_cast<intptr_t>(pc_offset() + kBranchSizeInBytes +
4531 kGuardSizeInBytes + max_veneer_size_in_bytes +
4532 margin);
4533 }
4534
RecordVeneerPool(int location_offset,int size)4535 void Assembler::RecordVeneerPool(int location_offset, int size) {
4536 Assembler::BlockPoolsScope block_pools(this, PoolEmissionCheck::kSkip);
4537 RelocInfo rinfo(reinterpret_cast<Address>(buffer_start_) + location_offset,
4538 RelocInfo::VENEER_POOL, static_cast<intptr_t>(size), Code());
4539 reloc_info_writer.Write(&rinfo);
4540 }
4541
EmitVeneers(bool force_emit,bool need_protection,size_t margin)4542 void Assembler::EmitVeneers(bool force_emit, bool need_protection,
4543 size_t margin) {
4544 ASM_CODE_COMMENT(this);
4545 BlockPoolsScope scope(this, PoolEmissionCheck::kSkip);
4546
4547 // The exact size of the veneer pool must be recorded (see the comment at the
4548 // declaration site of RecordConstPool()), but computing the number of
4549 // veneers that will be generated is not obvious. So instead we remember the
4550 // current position and will record the size after the pool has been
4551 // generated.
4552 Label size_check;
4553 bind(&size_check);
4554 int veneer_pool_relocinfo_loc = pc_offset();
4555
4556 Label end;
4557 if (need_protection) {
4558 b(&end);
4559 }
4560
4561 EmitVeneersGuard();
4562
4563 // We only emit veneers if needed (unless emission is forced), i.e. when the
4564 // max-reachable-pc of the branch has been exhausted by the current codegen
4565 // state. Specifically, we emit when the max-reachable-pc of the branch <= the
4566 // max-pc-after-veneers (over-approximated).
4567 const intptr_t max_pc_after_veneers =
4568 MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
4569
4570 // The `unresolved_branches_` multimap is sorted by max-reachable-pc in
4571 // ascending order. For efficiency reasons, we want to call
4572 // RemoveBranchFromLabelLinkChain in descending order. The actual veneers are
4573 // then generated in ascending order.
4574 // TODO(jgruber): This is still inefficient in multiple ways, thoughts on how
4575 // we could improve in the future:
4576 // - Don't erase individual elements from the multimap, erase a range instead.
4577 // - Replace the multimap by a simpler data structure (like a plain vector or
4578 // a circular array).
4579 // - Refactor s.t. RemoveBranchFromLabelLinkChain does not need the linear
4580 // lookup in the link chain.
4581
4582 static constexpr int kStaticTasksSize = 16; // Arbitrary.
4583 base::SmallVector<FarBranchInfo, kStaticTasksSize> tasks;
4584
4585 {
4586 auto it = unresolved_branches_.begin();
4587 while (it != unresolved_branches_.end()) {
4588 const int max_reachable_pc = it->first;
4589 if (!force_emit && max_reachable_pc > max_pc_after_veneers) break;
4590
4591 // Found a task. We'll emit a veneer for this.
4592 tasks.emplace_back(it->second);
4593 auto eraser_it = it++;
4594 unresolved_branches_.erase(eraser_it);
4595 }
4596 }
4597
4598 // Update next_veneer_pool_check_ (tightly coupled with unresolved_branches_).
4599 if (unresolved_branches_.empty()) {
4600 next_veneer_pool_check_ = kMaxInt;
4601 } else {
4602 next_veneer_pool_check_ =
4603 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4604 }
4605
4606 // Reminder: We iterate in reverse order to avoid duplicate linked-list
4607 // iteration in RemoveBranchFromLabelLinkChain (which starts at the target
4608 // label, and iterates backwards through linked branch instructions).
4609
4610 const int tasks_size = static_cast<int>(tasks.size());
4611 for (int i = tasks_size - 1; i >= 0; i--) {
4612 Instruction* branch = InstructionAt(tasks[i].pc_offset_);
4613 Instruction* veneer = reinterpret_cast<Instruction*>(
4614 reinterpret_cast<uintptr_t>(pc_) + i * kVeneerCodeSize);
4615 RemoveBranchFromLabelLinkChain(branch, tasks[i].label_, veneer);
4616 }
4617
4618 // Now emit the actual veneer and patch up the incoming branch.
4619
4620 for (const FarBranchInfo& info : tasks) {
4621 #ifdef DEBUG
4622 Label veneer_size_check;
4623 bind(&veneer_size_check);
4624 #endif
4625 Instruction* branch = InstructionAt(info.pc_offset_);
4626 Instruction* veneer = reinterpret_cast<Instruction*>(pc_);
4627 branch->SetImmPCOffsetTarget(options(), veneer);
4628 b(info.label_); // This may end up pointing at yet another veneer later on.
4629 DCHECK_EQ(SizeOfCodeGeneratedSince(&veneer_size_check),
4630 static_cast<uint64_t>(kVeneerCodeSize));
4631 }
4632
4633 // Record the veneer pool size.
4634 int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check));
4635 RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size);
4636
4637 bind(&end);
4638 }
4639
CheckVeneerPool(bool force_emit,bool require_jump,size_t margin)4640 void Assembler::CheckVeneerPool(bool force_emit, bool require_jump,
4641 size_t margin) {
4642 // There is nothing to do if there are no pending veneer pool entries.
4643 if (unresolved_branches_.empty()) {
4644 DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
4645 return;
4646 }
4647
4648 DCHECK(pc_offset() < unresolved_branches_first_limit());
4649
4650 // Some short sequence of instruction mustn't be broken up by veneer pool
4651 // emission, such sequences are protected by calls to BlockVeneerPoolFor and
4652 // BlockVeneerPoolScope.
4653 if (is_veneer_pool_blocked()) {
4654 DCHECK(!force_emit);
4655 return;
4656 }
4657
4658 if (!require_jump) {
4659 // Prefer emitting veneers protected by an existing instruction.
4660 margin *= kVeneerNoProtectionFactor;
4661 }
4662 if (force_emit || ShouldEmitVeneers(margin)) {
4663 EmitVeneers(force_emit, require_jump, margin);
4664 } else {
4665 next_veneer_pool_check_ =
4666 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4667 }
4668 }
4669
buffer_space() const4670 int Assembler::buffer_space() const {
4671 return static_cast<int>(reloc_info_writer.pos() - pc_);
4672 }
4673
RecordConstPool(int size)4674 void Assembler::RecordConstPool(int size) {
4675 // We only need this for debugger support, to correctly compute offsets in the
4676 // code.
4677 Assembler::BlockPoolsScope block_pools(this);
4678 RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
4679 }
4680
PatchAdrFar(int64_t target_offset)4681 void PatchingAssembler::PatchAdrFar(int64_t target_offset) {
4682 // The code at the current instruction should be:
4683 // adr rd, 0
4684 // nop (adr_far)
4685 // nop (adr_far)
4686 // movz scratch, 0
4687
4688 // Verify the expected code.
4689 Instruction* expected_adr = InstructionAt(0);
4690 CHECK(expected_adr->IsAdr() && (expected_adr->ImmPCRel() == 0));
4691 int rd_code = expected_adr->Rd();
4692 for (int i = 0; i < kAdrFarPatchableNNops; ++i) {
4693 CHECK(InstructionAt((i + 1) * kInstrSize)->IsNop(ADR_FAR_NOP));
4694 }
4695 Instruction* expected_movz =
4696 InstructionAt((kAdrFarPatchableNInstrs - 1) * kInstrSize);
4697 CHECK(expected_movz->IsMovz() && (expected_movz->ImmMoveWide() == 0) &&
4698 (expected_movz->ShiftMoveWide() == 0));
4699 int scratch_code = expected_movz->Rd();
4700
4701 // Patch to load the correct address.
4702 Register rd = Register::XRegFromCode(rd_code);
4703 Register scratch = Register::XRegFromCode(scratch_code);
4704 // Addresses are only 48 bits.
4705 adr(rd, target_offset & 0xFFFF);
4706 movz(scratch, (target_offset >> 16) & 0xFFFF, 16);
4707 movk(scratch, (target_offset >> 32) & 0xFFFF, 32);
4708 DCHECK_EQ(target_offset >> 48, 0);
4709 add(rd, rd, scratch);
4710 }
4711
PatchSubSp(uint32_t immediate)4712 void PatchingAssembler::PatchSubSp(uint32_t immediate) {
4713 // The code at the current instruction should be:
4714 // sub sp, sp, #0
4715
4716 // Verify the expected code.
4717 Instruction* expected_adr = InstructionAt(0);
4718 CHECK(expected_adr->IsAddSubImmediate());
4719 sub(sp, sp, immediate);
4720 }
4721
4722 #undef NEON_3DIFF_LONG_LIST
4723 #undef NEON_3DIFF_HN_LIST
4724 #undef NEON_ACROSSLANES_LIST
4725 #undef NEON_FP2REGMISC_FCVT_LIST
4726 #undef NEON_FP2REGMISC_LIST
4727 #undef NEON_3SAME_LIST
4728 #undef NEON_FP3SAME_LIST_V2
4729 #undef NEON_BYELEMENT_LIST
4730 #undef NEON_FPBYELEMENT_LIST
4731 #undef NEON_BYELEMENT_LONG_LIST
4732
4733 } // namespace internal
4734 } // namespace v8
4735
4736 #endif // V8_TARGET_ARCH_ARM64
4737