1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 2 * vim: set ts=8 sts=4 et sw=4 tw=99: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef jit_shared_IonAssemblerBufferWithConstantPools_h 8 #define jit_shared_IonAssemblerBufferWithConstantPools_h 9 10 #include "mozilla/MathAlgorithms.h" 11 12 #include <algorithm> 13 14 #include "jit/JitSpewer.h" 15 #include "jit/shared/IonAssemblerBuffer.h" 16 17 // This code extends the AssemblerBuffer to support the pooling of values loaded 18 // using program-counter relative addressing modes. This is necessary with the 19 // ARM instruction set because it has a fixed instruction size that can not 20 // encode all values as immediate arguments in instructions. Pooling the values 21 // allows the values to be placed in large chunks which minimizes the number of 22 // forced branches around them in the code. This is used for loading floating 23 // point constants, for loading 32 bit constants on the ARMv6, for absolute 24 // branch targets, and in future will be needed for large branches on the ARMv6. 25 // 26 // For simplicity of the implementation, the constant pools are always placed 27 // after the loads referencing them. When a new constant pool load is added to 28 // the assembler buffer, a corresponding pool entry is added to the current 29 // pending pool. The finishPool() method copies the current pending pool entries 30 // into the assembler buffer at the current offset and patches the pending 31 // constant pool load instructions. 32 // 33 // Before inserting instructions or pool entries, it is necessary to determine 34 // if doing so would place a pending pool entry out of reach of an instruction, 35 // and if so then the pool must firstly be dumped. With the allocation algorithm 36 // used below, the recalculation of all the distances between instructions and 37 // their pool entries can be avoided by noting that there will be a limiting 38 // instruction and pool entry pair that does not change when inserting more 39 // instructions. Adding more instructions makes the same increase to the 40 // distance, between instructions and their pool entries, for all such 41 // pairs. This pair is recorded as the limiter, and it is updated when new pool 42 // entries are added, see updateLimiter() 43 // 44 // The pools consist of: a guard instruction that branches around the pool, a 45 // header word that helps identify a pool in the instruction stream, and then 46 // the pool entries allocated in units of words. The guard instruction could be 47 // omitted if control does not reach the pool, and this is referred to as a 48 // natural guard below, but for simplicity the guard branch is always 49 // emitted. The pool header is an identifiable word that in combination with the 50 // guard uniquely identifies a pool in the instruction stream. The header also 51 // encodes the pool size and a flag indicating if the guard is natural. It is 52 // possible to iterate through the code instructions skipping or examining the 53 // pools. E.g. it might be necessary to skip pools when search for, or patching, 54 // an instruction sequence. 55 // 56 // It is often required to keep a reference to a pool entry, to patch it after 57 // the buffer is finished. Each pool entry is assigned a unique index, counting 58 // up from zero (see the poolEntryCount slot below). These can be mapped back to 59 // the offset of the pool entry in the finished buffer, see poolEntryOffset(). 60 // 61 // The code supports no-pool regions, and for these the size of the region, in 62 // instructions, must be supplied. This size is used to determine if inserting 63 // the instructions would place a pool entry out of range, and if so then a pool 64 // is firstly flushed. The DEBUG code checks that the emitted code is within the 65 // supplied size to detect programming errors. See enterNoPool() and 66 // leaveNoPool(). 67 68 // The only planned instruction sets that require inline constant pools are the 69 // ARM, ARM64, and MIPS, and these all have fixed 32-bit sized instructions so 70 // for simplicity the code below is specialized for fixed 32-bit sized 71 // instructions and makes no attempt to support variable length 72 // instructions. The base assembler buffer which supports variable width 73 // instruction is used by the x86 and x64 backends. 74 75 // The AssemblerBufferWithConstantPools template class uses static callbacks to 76 // the provided Asm template argument class: 77 // 78 // void Asm::InsertIndexIntoTag(uint8_t* load_, uint32_t index) 79 // 80 // When allocEntry() is called to add a constant pool load with an associated 81 // constant pool entry, this callback is called to encode the index of the 82 // allocated constant pool entry into the load instruction. 83 // 84 // After the constant pool has been placed, PatchConstantPoolLoad() is called 85 // to update the load instruction with the right load offset. 86 // 87 // void Asm::WritePoolGuard(BufferOffset branch, 88 // Instruction* dest, 89 // BufferOffset afterPool) 90 // 91 // Write out the constant pool guard branch before emitting the pool. 92 // 93 // branch 94 // Offset of the guard branch in the buffer. 95 // 96 // dest 97 // Pointer into the buffer where the guard branch should be emitted. (Same 98 // as getInst(branch)). Space for guardSize_ instructions has been reserved. 99 // 100 // afterPool 101 // Offset of the first instruction after the constant pool. This includes 102 // both pool entries and branch veneers added after the pool data. 103 // 104 // void Asm::WritePoolHeader(uint8_t* start, Pool* p, bool isNatural) 105 // 106 // Write out the pool header which follows the guard branch. 107 // 108 // void Asm::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) 109 // 110 // Re-encode a load of a constant pool entry after the location of the 111 // constant pool is known. 112 // 113 // The load instruction at loadAddr was previously passed to 114 // InsertIndexIntoTag(). The constPoolAddr is the final address of the 115 // constant pool in the assembler buffer. 116 // 117 // void Asm::PatchShortRangeBranchToVeneer(AssemblerBufferWithConstantPools*, 118 // unsigned rangeIdx, 119 // BufferOffset deadline, 120 // BufferOffset veneer) 121 // 122 // Patch a short-range branch to jump through a veneer before it goes out of 123 // range. 124 // 125 // rangeIdx, deadline 126 // These arguments were previously passed to registerBranchDeadline(). It is 127 // assumed that PatchShortRangeBranchToVeneer() knows how to compute the 128 // offset of the short-range branch from this information. 129 // 130 // veneer 131 // Space for a branch veneer, guaranteed to be <= deadline. At this 132 // position, guardSize_ * InstSize bytes are allocated. They should be 133 // initialized to the proper unconditional branch instruction. 134 // 135 // Unbound branches to the same unbound label are organized as a linked list: 136 // 137 // Label::offset -> Branch1 -> Branch2 -> Branch3 -> nil 138 // 139 // This callback should insert a new veneer branch into the list: 140 // 141 // Label::offset -> Branch1 -> Branch2 -> Veneer -> Branch3 -> nil 142 // 143 // When Assembler::bind() rewrites the branches with the real label offset, it 144 // probably has to bind Branch2 to target the veneer branch instead of jumping 145 // straight to the label. 146 147 namespace js { 148 namespace jit { 149 150 // BranchDeadlineSet - Keep track of pending branch deadlines. 151 // 152 // Some architectures like arm and arm64 have branch instructions with limited 153 // range. When assembling a forward branch, it is not always known if the final 154 // target label will be in range of the branch instruction. 155 // 156 // The BranchDeadlineSet data structure is used to keep track of the set of 157 // pending forward branches. It supports the following fast operations: 158 // 159 // 1. Get the earliest deadline in the set. 160 // 2. Add a new branch deadline. 161 // 3. Remove a branch deadline. 162 // 163 // Architectures may have different branch encodings with different ranges. Each 164 // supported range is assigned a small integer starting at 0. This data 165 // structure does not care about the actual range of branch instructions, just 166 // the latest buffer offset that can be reached - the deadline offset. 167 // 168 // Branched are stored as (rangeIdx, deadline) tuples. The target-specific code 169 // can compute the location of the branch itself from this information. This 170 // data structure does not need to know. 171 // 172 template <unsigned NumRanges> 173 class BranchDeadlineSet { 174 // Maintain a list of pending deadlines for each range separately. 175 // 176 // The offsets in each vector are always kept in ascending order. 177 // 178 // Because we have a separate vector for different ranges, as forward 179 // branches are added to the assembler buffer, their deadlines will 180 // always be appended to the vector corresponding to their range. 181 // 182 // When binding labels, we expect a more-or-less LIFO order of branch 183 // resolutions. This would always hold if we had strictly structured control 184 // flow. 185 // 186 // We allow branch deadlines to be added and removed in any order, but 187 // performance is best in the expected case of near LIFO order. 188 // 189 typedef Vector<BufferOffset, 8, LifoAllocPolicy<Fallible>> RangeVector; 190 191 // We really just want "RangeVector deadline_[NumRanges];", but each vector 192 // needs to be initialized with a LifoAlloc, and C++ doesn't bend that way. 193 // 194 // Use raw aligned storage instead and explicitly construct NumRanges 195 // vectors in our constructor. 196 mozilla::AlignedStorage2<RangeVector[NumRanges]> deadlineStorage_; 197 198 // Always access the range vectors through this method. vectorForRange(unsigned rangeIdx)199 RangeVector& vectorForRange(unsigned rangeIdx) { 200 MOZ_ASSERT(rangeIdx < NumRanges, "Invalid branch range index"); 201 return (*deadlineStorage_.addr())[rangeIdx]; 202 } 203 vectorForRange(unsigned rangeIdx)204 const RangeVector& vectorForRange(unsigned rangeIdx) const { 205 MOZ_ASSERT(rangeIdx < NumRanges, "Invalid branch range index"); 206 return (*deadlineStorage_.addr())[rangeIdx]; 207 } 208 209 // Maintain a precomputed earliest deadline at all times. 210 // This is unassigned only when all deadline vectors are empty. 211 BufferOffset earliest_; 212 213 // The range vector owning earliest_. Uninitialized when empty. 214 unsigned earliestRange_; 215 216 // Recompute the earliest deadline after it's been invalidated. recomputeEarliest()217 void recomputeEarliest() { 218 earliest_ = BufferOffset(); 219 for (unsigned r = 0; r < NumRanges; r++) { 220 auto& vec = vectorForRange(r); 221 if (!vec.empty() && (!earliest_.assigned() || vec[0] < earliest_)) { 222 earliest_ = vec[0]; 223 earliestRange_ = r; 224 } 225 } 226 } 227 228 // Update the earliest deadline if needed after inserting (rangeIdx, 229 // deadline). Always return true for convenience: 230 // return insert() && updateEarliest(). updateEarliest(unsigned rangeIdx,BufferOffset deadline)231 bool updateEarliest(unsigned rangeIdx, BufferOffset deadline) { 232 if (!earliest_.assigned() || deadline < earliest_) { 233 earliest_ = deadline; 234 earliestRange_ = rangeIdx; 235 } 236 return true; 237 } 238 239 public: BranchDeadlineSet(LifoAlloc & alloc)240 explicit BranchDeadlineSet(LifoAlloc& alloc) { 241 // Manually construct vectors in the uninitialized aligned storage. 242 // This is because C++ arrays can otherwise only be constructed with 243 // the default constructor. 244 for (unsigned r = 0; r < NumRanges; r++) 245 new (&vectorForRange(r)) RangeVector(alloc); 246 } 247 ~BranchDeadlineSet()248 ~BranchDeadlineSet() { 249 // Aligned storage doesn't destruct its contents automatically. 250 for (unsigned r = 0; r < NumRanges; r++) vectorForRange(r).~RangeVector(); 251 } 252 253 // Is this set completely empty? empty()254 bool empty() const { return !earliest_.assigned(); } 255 256 // Get the total number of deadlines in the set. size()257 size_t size() const { 258 size_t count = 0; 259 for (unsigned r = 0; r < NumRanges; r++) 260 count += vectorForRange(r).length(); 261 return count; 262 } 263 264 // Get the number of deadlines for the range with the most elements. maxRangeSize()265 size_t maxRangeSize() const { 266 size_t count = 0; 267 for (unsigned r = 0; r < NumRanges; r++) 268 count = std::max(count, vectorForRange(r).length()); 269 return count; 270 } 271 272 // Get the first deadline that is still in the set. earliestDeadline()273 BufferOffset earliestDeadline() const { 274 MOZ_ASSERT(!empty()); 275 return earliest_; 276 } 277 278 // Get the range index corresponding to earliestDeadlineRange(). earliestDeadlineRange()279 unsigned earliestDeadlineRange() const { 280 MOZ_ASSERT(!empty()); 281 return earliestRange_; 282 } 283 284 // Add a (rangeIdx, deadline) tuple to the set. 285 // 286 // It is assumed that this tuple is not already in the set. 287 // This function performs best id the added deadline is later than any 288 // existing deadline for the same range index. 289 // 290 // Return true if the tuple was added, false if the tuple could not be added 291 // because of an OOM error. addDeadline(unsigned rangeIdx,BufferOffset deadline)292 bool addDeadline(unsigned rangeIdx, BufferOffset deadline) { 293 MOZ_ASSERT(deadline.assigned(), "Can only store assigned buffer offsets"); 294 // This is the vector where deadline should be saved. 295 auto& vec = vectorForRange(rangeIdx); 296 297 // Fast case: Simple append to the relevant array. This never affects 298 // the earliest deadline. 299 if (!vec.empty() && vec.back() < deadline) return vec.append(deadline); 300 301 // Fast case: First entry to the vector. We need to update earliest_. 302 if (vec.empty()) 303 return vec.append(deadline) && updateEarliest(rangeIdx, deadline); 304 305 return addDeadlineSlow(rangeIdx, deadline); 306 } 307 308 private: 309 // General case of addDeadline. This is split into two functions such that 310 // the common case in addDeadline can be inlined while this part probably 311 // won't inline. addDeadlineSlow(unsigned rangeIdx,BufferOffset deadline)312 bool addDeadlineSlow(unsigned rangeIdx, BufferOffset deadline) { 313 auto& vec = vectorForRange(rangeIdx); 314 315 // Inserting into the middle of the vector. Use a log time binary search 316 // and a linear time insert(). 317 // Is it worthwhile special-casing the empty vector? 318 auto at = std::lower_bound(vec.begin(), vec.end(), deadline); 319 MOZ_ASSERT(at == vec.end() || *at != deadline, 320 "Cannot insert duplicate deadlines"); 321 return vec.insert(at, deadline) && updateEarliest(rangeIdx, deadline); 322 } 323 324 public: 325 // Remove a deadline from the set. 326 // If (rangeIdx, deadline) is not in the set, nothing happens. removeDeadline(unsigned rangeIdx,BufferOffset deadline)327 void removeDeadline(unsigned rangeIdx, BufferOffset deadline) { 328 auto& vec = vectorForRange(rangeIdx); 329 330 if (vec.empty()) return; 331 332 if (deadline == vec.back()) { 333 // Expected fast case: Structured control flow causes forward 334 // branches to be bound in reverse order. 335 vec.popBack(); 336 } else { 337 // Slow case: Binary search + linear erase. 338 auto where = std::lower_bound(vec.begin(), vec.end(), deadline); 339 if (where == vec.end() || *where != deadline) return; 340 vec.erase(where); 341 } 342 if (deadline == earliest_) recomputeEarliest(); 343 } 344 }; 345 346 // Specialization for architectures that don't need to track short-range 347 // branches. 348 template <> 349 class BranchDeadlineSet<0u> { 350 public: BranchDeadlineSet(LifoAlloc & alloc)351 explicit BranchDeadlineSet(LifoAlloc& alloc) {} empty()352 bool empty() const { return true; } size()353 size_t size() const { return 0; } maxRangeSize()354 size_t maxRangeSize() const { return 0; } earliestDeadline()355 BufferOffset earliestDeadline() const { MOZ_CRASH(); } earliestDeadlineRange()356 unsigned earliestDeadlineRange() const { MOZ_CRASH(); } addDeadline(unsigned rangeIdx,BufferOffset deadline)357 bool addDeadline(unsigned rangeIdx, BufferOffset deadline) { MOZ_CRASH(); } removeDeadline(unsigned rangeIdx,BufferOffset deadline)358 void removeDeadline(unsigned rangeIdx, BufferOffset deadline) { MOZ_CRASH(); } 359 }; 360 361 // The allocation unit size for pools. 362 typedef int32_t PoolAllocUnit; 363 364 // Hysteresis given to short-range branches. 365 // 366 // If any short-range branches will go out of range in the next N bytes, 367 // generate a veneer for them in the current pool. The hysteresis prevents the 368 // creation of many tiny constant pools for branch veneers. 369 const size_t ShortRangeBranchHysteresis = 128; 370 371 struct Pool { 372 private: 373 // The maximum program-counter relative offset below which the instruction 374 // set can encode. Different classes of intructions might support different 375 // ranges but for simplicity the minimum is used here, and for the ARM this 376 // is constrained to 1024 by the float load instructions. 377 const size_t maxOffset_; 378 // An offset to apply to program-counter relative offsets. The ARM has a 379 // bias of 8. 380 const unsigned bias_; 381 382 // The content of the pool entries. 383 Vector<PoolAllocUnit, 8, LifoAllocPolicy<Fallible>> poolData_; 384 385 // Flag that tracks OOM conditions. This is set after any append failed. 386 bool oom_; 387 388 // The limiting instruction and pool-entry pair. The instruction program 389 // counter relative offset of this limiting instruction will go out of range 390 // first as the pool position moves forward. It is more efficient to track 391 // just this limiting pair than to recheck all offsets when testing if the 392 // pool needs to be dumped. 393 // 394 // 1. The actual offset of the limiting instruction referencing the limiting 395 // pool entry. 396 BufferOffset limitingUser; 397 // 2. The pool entry index of the limiting pool entry. 398 unsigned limitingUsee; 399 400 public: 401 // A record of the code offset of instructions that reference pool 402 // entries. These instructions need to be patched when the actual position 403 // of the instructions and pools are known, and for the code below this 404 // occurs when each pool is finished, see finishPool(). 405 Vector<BufferOffset, 8, LifoAllocPolicy<Fallible>> loadOffsets; 406 407 // Create a Pool. Don't allocate anything from lifoAloc, just capture its 408 // reference. PoolPool409 explicit Pool(size_t maxOffset, unsigned bias, LifoAlloc& lifoAlloc) 410 : maxOffset_(maxOffset), 411 bias_(bias), 412 poolData_(lifoAlloc), 413 oom_(false), 414 limitingUser(), 415 limitingUsee(INT_MIN), 416 loadOffsets(lifoAlloc) {} 417 418 // If poolData() returns nullptr then oom_ will also be true. poolDataPool419 const PoolAllocUnit* poolData() const { return poolData_.begin(); } 420 numEntriesPool421 unsigned numEntries() const { return poolData_.length(); } 422 getPoolSizePool423 size_t getPoolSize() const { return numEntries() * sizeof(PoolAllocUnit); } 424 oomPool425 bool oom() const { return oom_; } 426 427 // Update the instruction/pool-entry pair that limits the position of the 428 // pool. The nextInst is the actual offset of the new instruction being 429 // allocated. 430 // 431 // This is comparing the offsets, see checkFull() below for the equation, 432 // but common expressions on both sides have been canceled from the ranges 433 // being compared. Notably, the poolOffset cancels out, so the limiting pair 434 // does not depend on where the pool is placed. updateLimiterPool435 void updateLimiter(BufferOffset nextInst) { 436 ptrdiff_t oldRange = 437 limitingUsee * sizeof(PoolAllocUnit) - limitingUser.getOffset(); 438 ptrdiff_t newRange = getPoolSize() - nextInst.getOffset(); 439 if (!limitingUser.assigned() || newRange > oldRange) { 440 // We have a new largest range! 441 limitingUser = nextInst; 442 limitingUsee = numEntries(); 443 } 444 } 445 446 // Check if inserting a pool at the actual offset poolOffset would place 447 // pool entries out of reach. This is called before inserting instructions 448 // to check that doing so would not push pool entries out of reach, and if 449 // so then the pool would need to be firstly dumped. The poolOffset is the 450 // first word of the pool, after the guard and header and alignment fill. checkFullPool451 bool checkFull(size_t poolOffset) const { 452 // Not full if there are no uses. 453 if (!limitingUser.assigned()) return false; 454 size_t offset = poolOffset + limitingUsee * sizeof(PoolAllocUnit) - 455 (limitingUser.getOffset() + bias_); 456 return offset >= maxOffset_; 457 } 458 459 static const unsigned OOM_FAIL = unsigned(-1); 460 insertEntryPool461 unsigned insertEntry(unsigned num, uint8_t* data, BufferOffset off, 462 LifoAlloc& lifoAlloc) { 463 if (oom_) return OOM_FAIL; 464 unsigned ret = numEntries(); 465 if (!poolData_.append((PoolAllocUnit*)data, num) || 466 !loadOffsets.append(off)) { 467 oom_ = true; 468 return OOM_FAIL; 469 } 470 return ret; 471 } 472 resetPool473 void reset() { 474 poolData_.clear(); 475 loadOffsets.clear(); 476 477 limitingUser = BufferOffset(); 478 limitingUsee = -1; 479 } 480 }; 481 482 // Template arguments: 483 // 484 // SliceSize 485 // Number of bytes in each allocated BufferSlice. See 486 // AssemblerBuffer::SliceSize. 487 // 488 // InstSize 489 // Size in bytes of the fixed-size instructions. This should be equal to 490 // sizeof(Inst). This is only needed here because the buffer is defined before 491 // the Instruction. 492 // 493 // Inst 494 // The actual type used to represent instructions. This is only really used as 495 // the return type of the getInst() method. 496 // 497 // Asm 498 // Class defining the needed static callback functions. See documentation of 499 // the Asm::* callbacks above. 500 // 501 // NumShortBranchRanges 502 // The number of short branch ranges to support. This can be 0 if no support 503 // for tracking short range branches is needed. The 504 // AssemblerBufferWithConstantPools class does not need to know what the range 505 // of branches is - it deals in branch 'deadlines' which is the last buffer 506 // position that a short-range forward branch can reach. It is assumed that 507 // the Asm class is able to find the actual branch instruction given a 508 // (range-index, deadline) pair. 509 // 510 // 511 template <size_t SliceSize, size_t InstSize, class Inst, class Asm, 512 unsigned NumShortBranchRanges = 0> 513 struct AssemblerBufferWithConstantPools 514 : public AssemblerBuffer<SliceSize, Inst> { 515 private: 516 // The PoolEntry index counter. Each PoolEntry is given a unique index, 517 // counting up from zero, and these can be mapped back to the actual pool 518 // entry offset after finishing the buffer, see poolEntryOffset(). 519 size_t poolEntryCount; 520 521 public: 522 class PoolEntry { 523 size_t index_; 524 525 public: PoolEntryAssemblerBufferWithConstantPools526 explicit PoolEntry(size_t index) : index_(index) {} 527 PoolEntryAssemblerBufferWithConstantPools528 PoolEntry() : index_(-1) {} 529 indexAssemblerBufferWithConstantPools530 size_t index() const { return index_; } 531 }; 532 533 private: 534 typedef AssemblerBuffer<SliceSize, Inst> Parent; 535 using typename Parent::Slice; 536 537 // The size of a pool guard, in instructions. A branch around the pool. 538 const unsigned guardSize_; 539 // The size of the header that is put at the beginning of a full pool, in 540 // instruction sized units. 541 const unsigned headerSize_; 542 543 // The maximum pc relative offset encoded in instructions that reference 544 // pool entries. This is generally set to the maximum offset that can be 545 // encoded by the instructions, but for testing can be lowered to affect the 546 // pool placement and frequency of pool placement. 547 const size_t poolMaxOffset_; 548 549 // The bias on pc relative addressing mode offsets, in units of bytes. The 550 // ARM has a bias of 8 bytes. 551 const unsigned pcBias_; 552 553 // The current working pool. Copied out as needed before resetting. 554 Pool pool_; 555 556 // The buffer should be aligned to this address. 557 const size_t instBufferAlign_; 558 559 struct PoolInfo { 560 // The index of the first entry in this pool. 561 // Pool entries are numbered uniquely across all pools, starting from 0. 562 unsigned firstEntryIndex; 563 564 // The location of this pool's first entry in the main assembler buffer. 565 // Note that the pool guard and header come before this offset which 566 // points directly at the data. 567 BufferOffset offset; 568 PoolInfoAssemblerBufferWithConstantPools::PoolInfo569 explicit PoolInfo(unsigned index, BufferOffset data) 570 : firstEntryIndex(index), offset(data) {} 571 }; 572 573 // Info for each pool that has already been dumped. This does not include 574 // any entries in pool_. 575 Vector<PoolInfo, 8, LifoAllocPolicy<Fallible>> poolInfo_; 576 577 // Set of short-range forward branches that have not yet been bound. 578 // We may need to insert veneers if the final label turns out to be out of 579 // range. 580 // 581 // This set stores (rangeIdx, deadline) pairs instead of the actual branch 582 // locations. 583 BranchDeadlineSet<NumShortBranchRanges> branchDeadlines_; 584 585 // When true dumping pools is inhibited. 586 bool canNotPlacePool_; 587 588 #ifdef DEBUG 589 // State for validating the 'maxInst' argument to enterNoPool(). 590 // The buffer offset when entering the no-pool region. 591 size_t canNotPlacePoolStartOffset_; 592 // The maximum number of word sized instructions declared for the no-pool 593 // region. 594 size_t canNotPlacePoolMaxInst_; 595 #endif 596 597 // Instruction to use for alignment fill. 598 const uint32_t alignFillInst_; 599 600 // Insert a number of NOP instructions between each requested instruction at 601 // all locations at which a pool can potentially spill. This is useful for 602 // checking that instruction locations are correctly referenced and/or 603 // followed. 604 const uint32_t nopFillInst_; 605 const unsigned nopFill_; 606 // For inhibiting the insertion of fill NOPs in the dynamic context in which 607 // they are being inserted. 608 bool inhibitNops_; 609 610 public: 611 // A unique id within each JitContext, to identify pools in the debug 612 // spew. Set by the MacroAssembler, see getNextAssemblerId(). 613 int id; 614 615 private: 616 // The buffer slices are in a double linked list. getHeadAssemblerBufferWithConstantPools617 Slice* getHead() const { return this->head; } getTailAssemblerBufferWithConstantPools618 Slice* getTail() const { return this->tail; } 619 620 public: 621 // Create an assembler buffer. 622 // Note that this constructor is not allowed to actually allocate memory from 623 // this->lifoAlloc_ because the MacroAssembler constructor has not yet created 624 // an AutoJitContextAlloc. 625 AssemblerBufferWithConstantPools(unsigned guardSize, unsigned headerSize, 626 size_t instBufferAlign, size_t poolMaxOffset, 627 unsigned pcBias, uint32_t alignFillInst, 628 uint32_t nopFillInst, unsigned nopFill = 0) 629 : poolEntryCount(0), 630 guardSize_(guardSize), 631 headerSize_(headerSize), 632 poolMaxOffset_(poolMaxOffset), 633 pcBias_(pcBias), 634 pool_(poolMaxOffset, pcBias, this->lifoAlloc_), 635 instBufferAlign_(instBufferAlign), 636 poolInfo_(this->lifoAlloc_), 637 branchDeadlines_(this->lifoAlloc_), 638 canNotPlacePool_(false), 639 #ifdef DEBUG 640 canNotPlacePoolStartOffset_(0), 641 canNotPlacePoolMaxInst_(0), 642 #endif 643 alignFillInst_(alignFillInst), 644 nopFillInst_(nopFillInst), 645 nopFill_(nopFill), 646 inhibitNops_(false), 647 id(-1) { 648 } 649 650 // We need to wait until an AutoJitContextAlloc is created by the 651 // MacroAssembler before allocating any space. initWithAllocatorAssemblerBufferWithConstantPools652 void initWithAllocator() { 653 // We hand out references to lifoAlloc_ in the constructor. 654 // Check that no allocations were made then. 655 MOZ_ASSERT(this->lifoAlloc_.isEmpty(), 656 "Illegal LIFO allocations before AutoJitContextAlloc"); 657 } 658 659 private: sizeExcludingCurrentPoolAssemblerBufferWithConstantPools660 size_t sizeExcludingCurrentPool() const { 661 // Return the actual size of the buffer, excluding the current pending 662 // pool. 663 return this->nextOffset().getOffset(); 664 } 665 666 public: sizeAssemblerBufferWithConstantPools667 size_t size() const { 668 // Return the current actual size of the buffer. This is only accurate 669 // if there are no pending pool entries to dump, check. 670 MOZ_ASSERT_IF(!this->oom(), pool_.numEntries() == 0); 671 return sizeExcludingCurrentPool(); 672 } 673 674 private: insertNopFillAssemblerBufferWithConstantPools675 void insertNopFill() { 676 // Insert fill for testing. 677 if (nopFill_ > 0 && !inhibitNops_ && !canNotPlacePool_) { 678 inhibitNops_ = true; 679 680 // Fill using a branch-nop rather than a NOP so this can be 681 // distinguished and skipped. 682 for (size_t i = 0; i < nopFill_; i++) putInt(nopFillInst_); 683 684 inhibitNops_ = false; 685 } 686 } 687 688 static const unsigned OOM_FAIL = unsigned(-1); 689 static const unsigned DUMMY_INDEX = unsigned(-2); 690 691 // Check if it is possible to add numInst instructions and numPoolEntries 692 // constant pool entries without needing to flush the current pool. hasSpaceForInstsAssemblerBufferWithConstantPools693 bool hasSpaceForInsts(unsigned numInsts, unsigned numPoolEntries) const { 694 size_t nextOffset = sizeExcludingCurrentPool(); 695 // Earliest starting offset for the current pool after adding numInsts. 696 // This is the beginning of the pool entries proper, after inserting a 697 // guard branch + pool header. 698 size_t poolOffset = 699 nextOffset + (numInsts + guardSize_ + headerSize_) * InstSize; 700 701 // Any constant pool loads that would go out of range? 702 if (pool_.checkFull(poolOffset)) return false; 703 704 // Any short-range branch that would go out of range? 705 if (!branchDeadlines_.empty()) { 706 size_t deadline = branchDeadlines_.earliestDeadline().getOffset(); 707 size_t poolEnd = poolOffset + pool_.getPoolSize() + 708 numPoolEntries * sizeof(PoolAllocUnit); 709 710 // When NumShortBranchRanges > 1, is is possible for branch deadlines to 711 // expire faster than we can insert veneers. Suppose branches are 4 bytes 712 // each, we could have the following deadline set: 713 // 714 // Range 0: 40, 44, 48 715 // Range 1: 44, 48 716 // 717 // It is not good enough to start inserting veneers at the 40 deadline; we 718 // would not be able to create veneers for the second 44 deadline. 719 // Instead, we need to start at 32: 720 // 721 // 32: veneer(40) 722 // 36: veneer(44) 723 // 40: veneer(44) 724 // 44: veneer(48) 725 // 48: veneer(48) 726 // 727 // This is a pretty conservative solution to the problem: If we begin at 728 // the earliest deadline, we can always emit all veneers for the range 729 // that currently has the most pending deadlines. That may not leave room 730 // for veneers for the remaining ranges, so reserve space for those 731 // secondary range veneers assuming the worst case deadlines. 732 733 // Total pending secondary range veneer size. 734 size_t secondaryVeneers = guardSize_ * (branchDeadlines_.size() - 735 branchDeadlines_.maxRangeSize()); 736 737 if (deadline < poolEnd + secondaryVeneers) return false; 738 } 739 740 return true; 741 } 742 insertEntryForwardsAssemblerBufferWithConstantPools743 unsigned insertEntryForwards(unsigned numInst, unsigned numPoolEntries, 744 uint8_t* inst, uint8_t* data) { 745 // If inserting pool entries then find a new limiter before we do the 746 // range check. 747 if (numPoolEntries) 748 pool_.updateLimiter(BufferOffset(sizeExcludingCurrentPool())); 749 750 if (!hasSpaceForInsts(numInst, numPoolEntries)) { 751 if (numPoolEntries) 752 JitSpew(JitSpew_Pools, "[%d] Inserting pool entry caused a spill", id); 753 else 754 JitSpew(JitSpew_Pools, "[%d] Inserting instruction(%zu) caused a spill", 755 id, sizeExcludingCurrentPool()); 756 757 finishPool(); 758 if (this->oom()) return OOM_FAIL; 759 return insertEntryForwards(numInst, numPoolEntries, inst, data); 760 } 761 if (numPoolEntries) { 762 unsigned result = pool_.insertEntry(numPoolEntries, data, 763 this->nextOffset(), this->lifoAlloc_); 764 if (result == Pool::OOM_FAIL) { 765 this->fail_oom(); 766 return OOM_FAIL; 767 } 768 return result; 769 } 770 771 // The pool entry index is returned above when allocating an entry, but 772 // when not allocating an entry a dummy value is returned - it is not 773 // expected to be used by the caller. 774 return DUMMY_INDEX; 775 } 776 777 public: 778 // Get the next buffer offset where an instruction would be inserted. 779 // This may flush the current constant pool before returning nextOffset(). nextInstrOffsetAssemblerBufferWithConstantPools780 BufferOffset nextInstrOffset() { 781 if (!hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0)) { 782 JitSpew(JitSpew_Pools, 783 "[%d] nextInstrOffset @ %d caused a constant pool spill", id, 784 this->nextOffset().getOffset()); 785 finishPool(); 786 } 787 return this->nextOffset(); 788 } 789 790 MOZ_NEVER_INLINE 791 BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries, 792 uint8_t* inst, uint8_t* data, 793 PoolEntry* pe = nullptr) { 794 // The allocation of pool entries is not supported in a no-pool region, 795 // check. 796 MOZ_ASSERT_IF(numPoolEntries, !canNotPlacePool_); 797 798 if (this->oom() && !this->bail()) return BufferOffset(); 799 800 insertNopFill(); 801 802 #ifdef JS_JITSPEW 803 if (numPoolEntries && JitSpewEnabled(JitSpew_Pools)) { 804 JitSpew(JitSpew_Pools, "[%d] Inserting %d entries into pool", id, 805 numPoolEntries); 806 JitSpewStart(JitSpew_Pools, "[%d] data is: 0x", id); 807 size_t length = numPoolEntries * sizeof(PoolAllocUnit); 808 for (unsigned idx = 0; idx < length; idx++) { 809 JitSpewCont(JitSpew_Pools, "%02x", data[length - idx - 1]); 810 if (((idx & 3) == 3) && (idx + 1 != length)) 811 JitSpewCont(JitSpew_Pools, "_"); 812 } 813 JitSpewFin(JitSpew_Pools); 814 } 815 #endif 816 817 // Insert the pool value. 818 unsigned index = insertEntryForwards(numInst, numPoolEntries, inst, data); 819 if (this->oom()) return BufferOffset(); 820 821 // Now to get an instruction to write. 822 PoolEntry retPE; 823 if (numPoolEntries) { 824 JitSpew(JitSpew_Pools, "[%d] Entry has index %u, offset %zu", id, index, 825 sizeExcludingCurrentPool()); 826 Asm::InsertIndexIntoTag(inst, index); 827 // Figure out the offset within the pool entries. 828 retPE = PoolEntry(poolEntryCount); 829 poolEntryCount += numPoolEntries; 830 } 831 // Now inst is a valid thing to insert into the instruction stream. 832 if (pe != nullptr) *pe = retPE; 833 return this->putBytes(numInst * InstSize, inst); 834 } 835 836 // putInt is the workhorse for the assembler and higher-level buffer 837 // abstractions: it places one instruction into the instruction stream. 838 // Under normal circumstances putInt should just check that the constant 839 // pool does not need to be flushed, that there is space for the single word 840 // of the instruction, and write that word and update the buffer pointer. 841 // 842 // To do better here we need a status variable that handles both nopFill_ 843 // and capacity, so that we can quickly know whether to go the slow path. 844 // That could be a variable that has the remaining number of simple 845 // instructions that can be inserted before a more expensive check, 846 // which is set to zero when nopFill_ is set. 847 // 848 // We assume that we don't have to check this->oom() if there is space to 849 // insert a plain instruction; there will always come a later time when it 850 // will be checked anyway. 851 852 MOZ_ALWAYS_INLINE putIntAssemblerBufferWithConstantPools853 BufferOffset putInt(uint32_t value) { 854 if (nopFill_ || 855 !hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0)) 856 return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr); 857 858 #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \ 859 defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) 860 return this->putU32Aligned(value); 861 #else 862 return this->AssemblerBuffer<SliceSize, Inst>::putInt(value); 863 #endif 864 } 865 866 // Register a short-range branch deadline. 867 // 868 // After inserting a short-range forward branch, call this method to 869 // register the branch 'deadline' which is the last buffer offset that the 870 // branch instruction can reach. 871 // 872 // When the branch is bound to a destination label, call 873 // unregisterBranchDeadline() to stop tracking this branch, 874 // 875 // If the assembled code is about to exceed the registered branch deadline, 876 // and unregisterBranchDeadline() has not yet been called, an 877 // instruction-sized constant pool entry is allocated before the branch 878 // deadline. 879 // 880 // rangeIdx 881 // A number < NumShortBranchRanges identifying the range of the branch. 882 // 883 // deadline 884 // The highest buffer offset the the short-range branch can reach 885 // directly. 886 // registerBranchDeadlineAssemblerBufferWithConstantPools887 void registerBranchDeadline(unsigned rangeIdx, BufferOffset deadline) { 888 if (!this->oom() && !branchDeadlines_.addDeadline(rangeIdx, deadline)) 889 this->fail_oom(); 890 } 891 892 // Un-register a short-range branch deadline. 893 // 894 // When a short-range branch has been successfully bound to its destination 895 // label, call this function to stop traching the branch. 896 // 897 // The (rangeIdx, deadline) pair must be previously registered. 898 // unregisterBranchDeadlineAssemblerBufferWithConstantPools899 void unregisterBranchDeadline(unsigned rangeIdx, BufferOffset deadline) { 900 if (!this->oom()) branchDeadlines_.removeDeadline(rangeIdx, deadline); 901 } 902 903 private: 904 // Are any short-range branches about to expire? hasExpirableShortRangeBranchesAssemblerBufferWithConstantPools905 bool hasExpirableShortRangeBranches() const { 906 if (branchDeadlines_.empty()) return false; 907 908 // Include branches that would expire in the next N bytes. 909 // The hysteresis avoids the needless creation of many tiny constant 910 // pools. 911 return this->nextOffset().getOffset() + ShortRangeBranchHysteresis > 912 size_t(branchDeadlines_.earliestDeadline().getOffset()); 913 } 914 finishPoolAssemblerBufferWithConstantPools915 void finishPool() { 916 JitSpew(JitSpew_Pools, 917 "[%d] Attempting to finish pool %zu with %u entries.", id, 918 poolInfo_.length(), pool_.numEntries()); 919 920 if (pool_.numEntries() == 0 && !hasExpirableShortRangeBranches()) { 921 // If there is no data in the pool being dumped, don't dump anything. 922 JitSpew(JitSpew_Pools, "[%d] Aborting because the pool is empty", id); 923 return; 924 } 925 926 // Should not be placing a pool in a no-pool region, check. 927 MOZ_ASSERT(!canNotPlacePool_); 928 929 // Dump the pool with a guard branch around the pool. 930 BufferOffset guard = this->putBytes(guardSize_ * InstSize, nullptr); 931 BufferOffset header = this->putBytes(headerSize_ * InstSize, nullptr); 932 BufferOffset data = this->putBytesLarge(pool_.getPoolSize(), 933 (const uint8_t*)pool_.poolData()); 934 if (this->oom()) return; 935 936 // Now generate branch veneers for any short-range branches that are 937 // about to expire. 938 while (hasExpirableShortRangeBranches()) { 939 unsigned rangeIdx = branchDeadlines_.earliestDeadlineRange(); 940 BufferOffset deadline = branchDeadlines_.earliestDeadline(); 941 942 // Stop tracking this branch. The Asm callback below may register 943 // new branches to track. 944 branchDeadlines_.removeDeadline(rangeIdx, deadline); 945 946 // Make room for the veneer. Same as a pool guard branch. 947 BufferOffset veneer = this->putBytes(guardSize_ * InstSize, nullptr); 948 if (this->oom()) return; 949 950 // Fix the branch so it targets the veneer. 951 // The Asm class knows how to find the original branch given the 952 // (rangeIdx, deadline) pair. 953 Asm::PatchShortRangeBranchToVeneer(this, rangeIdx, deadline, veneer); 954 } 955 956 // We only reserved space for the guard branch and pool header. 957 // Fill them in. 958 BufferOffset afterPool = this->nextOffset(); 959 Asm::WritePoolGuard(guard, this->getInst(guard), afterPool); 960 Asm::WritePoolHeader((uint8_t*)this->getInst(header), &pool_, false); 961 962 // With the pool's final position determined it is now possible to patch 963 // the instructions that reference entries in this pool, and this is 964 // done incrementally as each pool is finished. 965 size_t poolOffset = data.getOffset(); 966 967 unsigned idx = 0; 968 for (BufferOffset *iter = pool_.loadOffsets.begin(); 969 iter != pool_.loadOffsets.end(); ++iter, ++idx) { 970 // All entries should be before the pool. 971 MOZ_ASSERT(iter->getOffset() < guard.getOffset()); 972 973 // Everything here is known so we can safely do the necessary 974 // substitutions. 975 Inst* inst = this->getInst(*iter); 976 size_t codeOffset = poolOffset - iter->getOffset(); 977 978 // That is, PatchConstantPoolLoad wants to be handed the address of 979 // the pool entry that is being loaded. We need to do a non-trivial 980 // amount of math here, since the pool that we've made does not 981 // actually reside there in memory. 982 JitSpew(JitSpew_Pools, "[%d] Fixing entry %d offset to %zu", id, idx, 983 codeOffset); 984 Asm::PatchConstantPoolLoad(inst, (uint8_t*)inst + codeOffset); 985 } 986 987 // Record the pool info. 988 unsigned firstEntry = poolEntryCount - pool_.numEntries(); 989 if (!poolInfo_.append(PoolInfo(firstEntry, data))) { 990 this->fail_oom(); 991 return; 992 } 993 994 // Reset everything to the state that it was in when we started. 995 pool_.reset(); 996 } 997 998 public: flushPoolAssemblerBufferWithConstantPools999 void flushPool() { 1000 if (this->oom()) return; 1001 JitSpew(JitSpew_Pools, "[%d] Requesting a pool flush", id); 1002 finishPool(); 1003 } 1004 enterNoPoolAssemblerBufferWithConstantPools1005 void enterNoPool(size_t maxInst) { 1006 // Don't allow re-entry. 1007 MOZ_ASSERT(!canNotPlacePool_); 1008 insertNopFill(); 1009 1010 // Check if the pool will spill by adding maxInst instructions, and if 1011 // so then finish the pool before entering the no-pool region. It is 1012 // assumed that no pool entries are allocated in a no-pool region and 1013 // this is asserted when allocating entries. 1014 if (!hasSpaceForInsts(maxInst, 0)) { 1015 JitSpew(JitSpew_Pools, "[%d] No-Pool instruction(%zu) caused a spill.", 1016 id, sizeExcludingCurrentPool()); 1017 finishPool(); 1018 } 1019 1020 #ifdef DEBUG 1021 // Record the buffer position to allow validating maxInst when leaving 1022 // the region. 1023 canNotPlacePoolStartOffset_ = this->nextOffset().getOffset(); 1024 canNotPlacePoolMaxInst_ = maxInst; 1025 #endif 1026 1027 canNotPlacePool_ = true; 1028 } 1029 leaveNoPoolAssemblerBufferWithConstantPools1030 void leaveNoPool() { 1031 MOZ_ASSERT(canNotPlacePool_); 1032 canNotPlacePool_ = false; 1033 1034 // Validate the maxInst argument supplied to enterNoPool(). 1035 MOZ_ASSERT(this->nextOffset().getOffset() - canNotPlacePoolStartOffset_ <= 1036 canNotPlacePoolMaxInst_ * InstSize); 1037 } 1038 alignAssemblerBufferWithConstantPools1039 void align(unsigned alignment) { 1040 MOZ_ASSERT(mozilla::IsPowerOfTwo(alignment)); 1041 MOZ_ASSERT(alignment >= InstSize); 1042 1043 // A pool many need to be dumped at this point, so insert NOP fill here. 1044 insertNopFill(); 1045 1046 // Check if the code position can be aligned without dumping a pool. 1047 unsigned requiredFill = sizeExcludingCurrentPool() & (alignment - 1); 1048 if (requiredFill == 0) return; 1049 requiredFill = alignment - requiredFill; 1050 1051 // Add an InstSize because it is probably not useful for a pool to be 1052 // dumped at the aligned code position. 1053 if (!hasSpaceForInsts(requiredFill / InstSize + 1, 0)) { 1054 // Alignment would cause a pool dump, so dump the pool now. 1055 JitSpew(JitSpew_Pools, "[%d] Alignment of %d at %zu caused a spill.", id, 1056 alignment, sizeExcludingCurrentPool()); 1057 finishPool(); 1058 } 1059 1060 inhibitNops_ = true; 1061 while ((sizeExcludingCurrentPool() & (alignment - 1)) && !this->oom()) 1062 putInt(alignFillInst_); 1063 inhibitNops_ = false; 1064 } 1065 1066 public: executableCopyAssemblerBufferWithConstantPools1067 void executableCopy(uint8_t* dest) { 1068 if (this->oom()) return; 1069 // The pools should have all been flushed, check. 1070 MOZ_ASSERT(pool_.numEntries() == 0); 1071 for (Slice* cur = getHead(); cur != nullptr; cur = cur->getNext()) { 1072 memcpy(dest, &cur->instructions[0], cur->length()); 1073 dest += cur->length(); 1074 } 1075 } 1076 appendRawCodeAssemblerBufferWithConstantPools1077 bool appendRawCode(const uint8_t* code, size_t numBytes) { 1078 if (this->oom()) return false; 1079 // The pools should have all been flushed, check. 1080 MOZ_ASSERT(pool_.numEntries() == 0); 1081 while (numBytes > SliceSize) { 1082 this->putBytes(SliceSize, code); 1083 numBytes -= SliceSize; 1084 code += SliceSize; 1085 } 1086 this->putBytes(numBytes, code); 1087 return !this->oom(); 1088 } 1089 1090 public: poolEntryOffsetAssemblerBufferWithConstantPools1091 size_t poolEntryOffset(PoolEntry pe) const { 1092 MOZ_ASSERT(pe.index() < poolEntryCount - pool_.numEntries(), 1093 "Invalid pool entry, or not flushed yet."); 1094 // Find the pool containing pe.index(). 1095 // The array is sorted, so we can use a binary search. 1096 auto b = poolInfo_.begin(), e = poolInfo_.end(); 1097 // A note on asymmetric types in the upper_bound comparator: 1098 // http://permalink.gmane.org/gmane.comp.compilers.clang.devel/10101 1099 auto i = std::upper_bound(b, e, pe.index(), 1100 [](size_t value, const PoolInfo& entry) { 1101 return value < entry.firstEntryIndex; 1102 }); 1103 // Since upper_bound finds the first pool greater than pe, 1104 // we want the previous one which is the last one less than or equal. 1105 MOZ_ASSERT(i != b, "PoolInfo not sorted or empty?"); 1106 --i; 1107 // The i iterator now points to the pool containing pe.index. 1108 MOZ_ASSERT(i->firstEntryIndex <= pe.index() && 1109 (i + 1 == e || (i + 1)->firstEntryIndex > pe.index())); 1110 // Compute the byte offset into the pool. 1111 unsigned relativeIndex = pe.index() - i->firstEntryIndex; 1112 return i->offset.getOffset() + relativeIndex * sizeof(PoolAllocUnit); 1113 } 1114 }; 1115 1116 } // namespace jit 1117 } // namespace js 1118 1119 #endif // jit_shared_IonAssemblerBufferWithConstantPools_h 1120