1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef jit_shared_IonAssemblerBufferWithConstantPools_h
8 #define jit_shared_IonAssemblerBufferWithConstantPools_h
9 
10 #include "mozilla/MathAlgorithms.h"
11 
12 #include <algorithm>
13 
14 #include "jit/JitSpewer.h"
15 #include "jit/shared/IonAssemblerBuffer.h"
16 
17 // This code extends the AssemblerBuffer to support the pooling of values loaded
18 // using program-counter relative addressing modes. This is necessary with the
19 // ARM instruction set because it has a fixed instruction size that can not
20 // encode all values as immediate arguments in instructions. Pooling the values
21 // allows the values to be placed in large chunks which minimizes the number of
22 // forced branches around them in the code. This is used for loading floating
23 // point constants, for loading 32 bit constants on the ARMv6, for absolute
24 // branch targets, and in future will be needed for large branches on the ARMv6.
25 //
26 // For simplicity of the implementation, the constant pools are always placed
27 // after the loads referencing them. When a new constant pool load is added to
28 // the assembler buffer, a corresponding pool entry is added to the current
29 // pending pool. The finishPool() method copies the current pending pool entries
30 // into the assembler buffer at the current offset and patches the pending
31 // constant pool load instructions.
32 //
33 // Before inserting instructions or pool entries, it is necessary to determine
34 // if doing so would place a pending pool entry out of reach of an instruction,
35 // and if so then the pool must firstly be dumped. With the allocation algorithm
36 // used below, the recalculation of all the distances between instructions and
37 // their pool entries can be avoided by noting that there will be a limiting
38 // instruction and pool entry pair that does not change when inserting more
39 // instructions. Adding more instructions makes the same increase to the
40 // distance, between instructions and their pool entries, for all such
41 // pairs. This pair is recorded as the limiter, and it is updated when new pool
42 // entries are added, see updateLimiter()
43 //
44 // The pools consist of: a guard instruction that branches around the pool, a
45 // header word that helps identify a pool in the instruction stream, and then
46 // the pool entries allocated in units of words. The guard instruction could be
47 // omitted if control does not reach the pool, and this is referred to as a
48 // natural guard below, but for simplicity the guard branch is always
49 // emitted. The pool header is an identifiable word that in combination with the
50 // guard uniquely identifies a pool in the instruction stream. The header also
51 // encodes the pool size and a flag indicating if the guard is natural. It is
52 // possible to iterate through the code instructions skipping or examining the
53 // pools. E.g. it might be necessary to skip pools when search for, or patching,
54 // an instruction sequence.
55 //
56 // It is often required to keep a reference to a pool entry, to patch it after
57 // the buffer is finished. Each pool entry is assigned a unique index, counting
58 // up from zero (see the poolEntryCount slot below). These can be mapped back to
59 // the offset of the pool entry in the finished buffer, see poolEntryOffset().
60 //
61 // The code supports no-pool regions, and for these the size of the region, in
62 // instructions, must be supplied. This size is used to determine if inserting
63 // the instructions would place a pool entry out of range, and if so then a pool
64 // is firstly flushed. The DEBUG code checks that the emitted code is within the
65 // supplied size to detect programming errors. See enterNoPool() and
66 // leaveNoPool().
67 
68 // The only planned instruction sets that require inline constant pools are the
69 // ARM, ARM64, and MIPS, and these all have fixed 32-bit sized instructions so
70 // for simplicity the code below is specialized for fixed 32-bit sized
71 // instructions and makes no attempt to support variable length
72 // instructions. The base assembler buffer which supports variable width
73 // instruction is used by the x86 and x64 backends.
74 
75 // The AssemblerBufferWithConstantPools template class uses static callbacks to
76 // the provided Asm template argument class:
77 //
78 // void Asm::InsertIndexIntoTag(uint8_t* load_, uint32_t index)
79 //
80 //   When allocEntry() is called to add a constant pool load with an associated
81 //   constant pool entry, this callback is called to encode the index of the
82 //   allocated constant pool entry into the load instruction.
83 //
84 //   After the constant pool has been placed, PatchConstantPoolLoad() is called
85 //   to update the load instruction with the right load offset.
86 //
87 // void Asm::WritePoolGuard(BufferOffset branch,
88 //                          Instruction* dest,
89 //                          BufferOffset afterPool)
90 //
91 //   Write out the constant pool guard branch before emitting the pool.
92 //
93 //   branch
94 //     Offset of the guard branch in the buffer.
95 //
96 //   dest
97 //     Pointer into the buffer where the guard branch should be emitted. (Same
98 //     as getInst(branch)). Space for guardSize_ instructions has been reserved.
99 //
100 //   afterPool
101 //     Offset of the first instruction after the constant pool. This includes
102 //     both pool entries and branch veneers added after the pool data.
103 //
104 // void Asm::WritePoolHeader(uint8_t* start, Pool* p, bool isNatural)
105 //
106 //   Write out the pool header which follows the guard branch.
107 //
108 // void Asm::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr)
109 //
110 //   Re-encode a load of a constant pool entry after the location of the
111 //   constant pool is known.
112 //
113 //   The load instruction at loadAddr was previously passed to
114 //   InsertIndexIntoTag(). The constPoolAddr is the final address of the
115 //   constant pool in the assembler buffer.
116 //
117 // void Asm::PatchShortRangeBranchToVeneer(AssemblerBufferWithConstantPools*,
118 //                                         unsigned rangeIdx,
119 //                                         BufferOffset deadline,
120 //                                         BufferOffset veneer)
121 //
122 //   Patch a short-range branch to jump through a veneer before it goes out of
123 //   range.
124 //
125 //   rangeIdx, deadline
126 //     These arguments were previously passed to registerBranchDeadline(). It is
127 //     assumed that PatchShortRangeBranchToVeneer() knows how to compute the
128 //     offset of the short-range branch from this information.
129 //
130 //   veneer
131 //     Space for a branch veneer, guaranteed to be <= deadline. At this
132 //     position, guardSize_ * InstSize bytes are allocated. They should be
133 //     initialized to the proper unconditional branch instruction.
134 //
135 //   Unbound branches to the same unbound label are organized as a linked list:
136 //
137 //     Label::offset -> Branch1 -> Branch2 -> Branch3 -> nil
138 //
139 //   This callback should insert a new veneer branch into the list:
140 //
141 //     Label::offset -> Branch1 -> Branch2 -> Veneer -> Branch3 -> nil
142 //
143 //   When Assembler::bind() rewrites the branches with the real label offset, it
144 //   probably has to bind Branch2 to target the veneer branch instead of jumping
145 //   straight to the label.
146 
147 namespace js {
148 namespace jit {
149 
150 // BranchDeadlineSet - Keep track of pending branch deadlines.
151 //
152 // Some architectures like arm and arm64 have branch instructions with limited
153 // range. When assembling a forward branch, it is not always known if the final
154 // target label will be in range of the branch instruction.
155 //
156 // The BranchDeadlineSet data structure is used to keep track of the set of
157 // pending forward branches. It supports the following fast operations:
158 //
159 // 1. Get the earliest deadline in the set.
160 // 2. Add a new branch deadline.
161 // 3. Remove a branch deadline.
162 //
163 // Architectures may have different branch encodings with different ranges. Each
164 // supported range is assigned a small integer starting at 0. This data
165 // structure does not care about the actual range of branch instructions, just
166 // the latest buffer offset that can be reached - the deadline offset.
167 //
168 // Branched are stored as (rangeIdx, deadline) tuples. The target-specific code
169 // can compute the location of the branch itself from this information. This
170 // data structure does not need to know.
171 //
172 template <unsigned NumRanges>
173 class BranchDeadlineSet {
174   // Maintain a list of pending deadlines for each range separately.
175   //
176   // The offsets in each vector are always kept in ascending order.
177   //
178   // Because we have a separate vector for different ranges, as forward
179   // branches are added to the assembler buffer, their deadlines will
180   // always be appended to the vector corresponding to their range.
181   //
182   // When binding labels, we expect a more-or-less LIFO order of branch
183   // resolutions. This would always hold if we had strictly structured control
184   // flow.
185   //
186   // We allow branch deadlines to be added and removed in any order, but
187   // performance is best in the expected case of near LIFO order.
188   //
189   typedef Vector<BufferOffset, 8, LifoAllocPolicy<Fallible>> RangeVector;
190 
191   // We really just want "RangeVector deadline_[NumRanges];", but each vector
192   // needs to be initialized with a LifoAlloc, and C++ doesn't bend that way.
193   //
194   // Use raw aligned storage instead and explicitly construct NumRanges
195   // vectors in our constructor.
196   mozilla::AlignedStorage2<RangeVector[NumRanges]> deadlineStorage_;
197 
198   // Always access the range vectors through this method.
vectorForRange(unsigned rangeIdx)199   RangeVector& vectorForRange(unsigned rangeIdx) {
200     MOZ_ASSERT(rangeIdx < NumRanges, "Invalid branch range index");
201     return (*deadlineStorage_.addr())[rangeIdx];
202   }
203 
vectorForRange(unsigned rangeIdx)204   const RangeVector& vectorForRange(unsigned rangeIdx) const {
205     MOZ_ASSERT(rangeIdx < NumRanges, "Invalid branch range index");
206     return (*deadlineStorage_.addr())[rangeIdx];
207   }
208 
209   // Maintain a precomputed earliest deadline at all times.
210   // This is unassigned only when all deadline vectors are empty.
211   BufferOffset earliest_;
212 
213   // The range vector owning earliest_. Uninitialized when empty.
214   unsigned earliestRange_;
215 
216   // Recompute the earliest deadline after it's been invalidated.
recomputeEarliest()217   void recomputeEarliest() {
218     earliest_ = BufferOffset();
219     for (unsigned r = 0; r < NumRanges; r++) {
220       auto& vec = vectorForRange(r);
221       if (!vec.empty() && (!earliest_.assigned() || vec[0] < earliest_)) {
222         earliest_ = vec[0];
223         earliestRange_ = r;
224       }
225     }
226   }
227 
228   // Update the earliest deadline if needed after inserting (rangeIdx,
229   // deadline). Always return true for convenience:
230   // return insert() && updateEarliest().
updateEarliest(unsigned rangeIdx,BufferOffset deadline)231   bool updateEarliest(unsigned rangeIdx, BufferOffset deadline) {
232     if (!earliest_.assigned() || deadline < earliest_) {
233       earliest_ = deadline;
234       earliestRange_ = rangeIdx;
235     }
236     return true;
237   }
238 
239  public:
BranchDeadlineSet(LifoAlloc & alloc)240   explicit BranchDeadlineSet(LifoAlloc& alloc) {
241     // Manually construct vectors in the uninitialized aligned storage.
242     // This is because C++ arrays can otherwise only be constructed with
243     // the default constructor.
244     for (unsigned r = 0; r < NumRanges; r++)
245       new (&vectorForRange(r)) RangeVector(alloc);
246   }
247 
~BranchDeadlineSet()248   ~BranchDeadlineSet() {
249     // Aligned storage doesn't destruct its contents automatically.
250     for (unsigned r = 0; r < NumRanges; r++) vectorForRange(r).~RangeVector();
251   }
252 
253   // Is this set completely empty?
empty()254   bool empty() const { return !earliest_.assigned(); }
255 
256   // Get the total number of deadlines in the set.
size()257   size_t size() const {
258     size_t count = 0;
259     for (unsigned r = 0; r < NumRanges; r++)
260       count += vectorForRange(r).length();
261     return count;
262   }
263 
264   // Get the number of deadlines for the range with the most elements.
maxRangeSize()265   size_t maxRangeSize() const {
266     size_t count = 0;
267     for (unsigned r = 0; r < NumRanges; r++)
268       count = std::max(count, vectorForRange(r).length());
269     return count;
270   }
271 
272   // Get the first deadline that is still in the set.
earliestDeadline()273   BufferOffset earliestDeadline() const {
274     MOZ_ASSERT(!empty());
275     return earliest_;
276   }
277 
278   // Get the range index corresponding to earliestDeadlineRange().
earliestDeadlineRange()279   unsigned earliestDeadlineRange() const {
280     MOZ_ASSERT(!empty());
281     return earliestRange_;
282   }
283 
284   // Add a (rangeIdx, deadline) tuple to the set.
285   //
286   // It is assumed that this tuple is not already in the set.
287   // This function performs best id the added deadline is later than any
288   // existing deadline for the same range index.
289   //
290   // Return true if the tuple was added, false if the tuple could not be added
291   // because of an OOM error.
addDeadline(unsigned rangeIdx,BufferOffset deadline)292   bool addDeadline(unsigned rangeIdx, BufferOffset deadline) {
293     MOZ_ASSERT(deadline.assigned(), "Can only store assigned buffer offsets");
294     // This is the vector where deadline should be saved.
295     auto& vec = vectorForRange(rangeIdx);
296 
297     // Fast case: Simple append to the relevant array. This never affects
298     // the earliest deadline.
299     if (!vec.empty() && vec.back() < deadline) return vec.append(deadline);
300 
301     // Fast case: First entry to the vector. We need to update earliest_.
302     if (vec.empty())
303       return vec.append(deadline) && updateEarliest(rangeIdx, deadline);
304 
305     return addDeadlineSlow(rangeIdx, deadline);
306   }
307 
308  private:
309   // General case of addDeadline. This is split into two functions such that
310   // the common case in addDeadline can be inlined while this part probably
311   // won't inline.
addDeadlineSlow(unsigned rangeIdx,BufferOffset deadline)312   bool addDeadlineSlow(unsigned rangeIdx, BufferOffset deadline) {
313     auto& vec = vectorForRange(rangeIdx);
314 
315     // Inserting into the middle of the vector. Use a log time binary search
316     // and a linear time insert().
317     // Is it worthwhile special-casing the empty vector?
318     auto at = std::lower_bound(vec.begin(), vec.end(), deadline);
319     MOZ_ASSERT(at == vec.end() || *at != deadline,
320                "Cannot insert duplicate deadlines");
321     return vec.insert(at, deadline) && updateEarliest(rangeIdx, deadline);
322   }
323 
324  public:
325   // Remove a deadline from the set.
326   // If (rangeIdx, deadline) is not in the set, nothing happens.
removeDeadline(unsigned rangeIdx,BufferOffset deadline)327   void removeDeadline(unsigned rangeIdx, BufferOffset deadline) {
328     auto& vec = vectorForRange(rangeIdx);
329 
330     if (vec.empty()) return;
331 
332     if (deadline == vec.back()) {
333       // Expected fast case: Structured control flow causes forward
334       // branches to be bound in reverse order.
335       vec.popBack();
336     } else {
337       // Slow case: Binary search + linear erase.
338       auto where = std::lower_bound(vec.begin(), vec.end(), deadline);
339       if (where == vec.end() || *where != deadline) return;
340       vec.erase(where);
341     }
342     if (deadline == earliest_) recomputeEarliest();
343   }
344 };
345 
346 // Specialization for architectures that don't need to track short-range
347 // branches.
348 template <>
349 class BranchDeadlineSet<0u> {
350  public:
BranchDeadlineSet(LifoAlloc & alloc)351   explicit BranchDeadlineSet(LifoAlloc& alloc) {}
empty()352   bool empty() const { return true; }
size()353   size_t size() const { return 0; }
maxRangeSize()354   size_t maxRangeSize() const { return 0; }
earliestDeadline()355   BufferOffset earliestDeadline() const { MOZ_CRASH(); }
earliestDeadlineRange()356   unsigned earliestDeadlineRange() const { MOZ_CRASH(); }
addDeadline(unsigned rangeIdx,BufferOffset deadline)357   bool addDeadline(unsigned rangeIdx, BufferOffset deadline) { MOZ_CRASH(); }
removeDeadline(unsigned rangeIdx,BufferOffset deadline)358   void removeDeadline(unsigned rangeIdx, BufferOffset deadline) { MOZ_CRASH(); }
359 };
360 
361 // The allocation unit size for pools.
362 typedef int32_t PoolAllocUnit;
363 
364 // Hysteresis given to short-range branches.
365 //
366 // If any short-range branches will go out of range in the next N bytes,
367 // generate a veneer for them in the current pool. The hysteresis prevents the
368 // creation of many tiny constant pools for branch veneers.
369 const size_t ShortRangeBranchHysteresis = 128;
370 
371 struct Pool {
372  private:
373   // The maximum program-counter relative offset below which the instruction
374   // set can encode. Different classes of intructions might support different
375   // ranges but for simplicity the minimum is used here, and for the ARM this
376   // is constrained to 1024 by the float load instructions.
377   const size_t maxOffset_;
378   // An offset to apply to program-counter relative offsets. The ARM has a
379   // bias of 8.
380   const unsigned bias_;
381 
382   // The content of the pool entries.
383   Vector<PoolAllocUnit, 8, LifoAllocPolicy<Fallible>> poolData_;
384 
385   // Flag that tracks OOM conditions. This is set after any append failed.
386   bool oom_;
387 
388   // The limiting instruction and pool-entry pair. The instruction program
389   // counter relative offset of this limiting instruction will go out of range
390   // first as the pool position moves forward. It is more efficient to track
391   // just this limiting pair than to recheck all offsets when testing if the
392   // pool needs to be dumped.
393   //
394   // 1. The actual offset of the limiting instruction referencing the limiting
395   // pool entry.
396   BufferOffset limitingUser;
397   // 2. The pool entry index of the limiting pool entry.
398   unsigned limitingUsee;
399 
400  public:
401   // A record of the code offset of instructions that reference pool
402   // entries. These instructions need to be patched when the actual position
403   // of the instructions and pools are known, and for the code below this
404   // occurs when each pool is finished, see finishPool().
405   Vector<BufferOffset, 8, LifoAllocPolicy<Fallible>> loadOffsets;
406 
407   // Create a Pool. Don't allocate anything from lifoAloc, just capture its
408   // reference.
PoolPool409   explicit Pool(size_t maxOffset, unsigned bias, LifoAlloc& lifoAlloc)
410       : maxOffset_(maxOffset),
411         bias_(bias),
412         poolData_(lifoAlloc),
413         oom_(false),
414         limitingUser(),
415         limitingUsee(INT_MIN),
416         loadOffsets(lifoAlloc) {}
417 
418   // If poolData() returns nullptr then oom_ will also be true.
poolDataPool419   const PoolAllocUnit* poolData() const { return poolData_.begin(); }
420 
numEntriesPool421   unsigned numEntries() const { return poolData_.length(); }
422 
getPoolSizePool423   size_t getPoolSize() const { return numEntries() * sizeof(PoolAllocUnit); }
424 
oomPool425   bool oom() const { return oom_; }
426 
427   // Update the instruction/pool-entry pair that limits the position of the
428   // pool. The nextInst is the actual offset of the new instruction being
429   // allocated.
430   //
431   // This is comparing the offsets, see checkFull() below for the equation,
432   // but common expressions on both sides have been canceled from the ranges
433   // being compared. Notably, the poolOffset cancels out, so the limiting pair
434   // does not depend on where the pool is placed.
updateLimiterPool435   void updateLimiter(BufferOffset nextInst) {
436     ptrdiff_t oldRange =
437         limitingUsee * sizeof(PoolAllocUnit) - limitingUser.getOffset();
438     ptrdiff_t newRange = getPoolSize() - nextInst.getOffset();
439     if (!limitingUser.assigned() || newRange > oldRange) {
440       // We have a new largest range!
441       limitingUser = nextInst;
442       limitingUsee = numEntries();
443     }
444   }
445 
446   // Check if inserting a pool at the actual offset poolOffset would place
447   // pool entries out of reach. This is called before inserting instructions
448   // to check that doing so would not push pool entries out of reach, and if
449   // so then the pool would need to be firstly dumped. The poolOffset is the
450   // first word of the pool, after the guard and header and alignment fill.
checkFullPool451   bool checkFull(size_t poolOffset) const {
452     // Not full if there are no uses.
453     if (!limitingUser.assigned()) return false;
454     size_t offset = poolOffset + limitingUsee * sizeof(PoolAllocUnit) -
455                     (limitingUser.getOffset() + bias_);
456     return offset >= maxOffset_;
457   }
458 
459   static const unsigned OOM_FAIL = unsigned(-1);
460 
insertEntryPool461   unsigned insertEntry(unsigned num, uint8_t* data, BufferOffset off,
462                        LifoAlloc& lifoAlloc) {
463     if (oom_) return OOM_FAIL;
464     unsigned ret = numEntries();
465     if (!poolData_.append((PoolAllocUnit*)data, num) ||
466         !loadOffsets.append(off)) {
467       oom_ = true;
468       return OOM_FAIL;
469     }
470     return ret;
471   }
472 
resetPool473   void reset() {
474     poolData_.clear();
475     loadOffsets.clear();
476 
477     limitingUser = BufferOffset();
478     limitingUsee = -1;
479   }
480 };
481 
482 // Template arguments:
483 //
484 // SliceSize
485 //   Number of bytes in each allocated BufferSlice. See
486 //   AssemblerBuffer::SliceSize.
487 //
488 // InstSize
489 //   Size in bytes of the fixed-size instructions. This should be equal to
490 //   sizeof(Inst). This is only needed here because the buffer is defined before
491 //   the Instruction.
492 //
493 // Inst
494 //   The actual type used to represent instructions. This is only really used as
495 //   the return type of the getInst() method.
496 //
497 // Asm
498 //   Class defining the needed static callback functions. See documentation of
499 //   the Asm::* callbacks above.
500 //
501 // NumShortBranchRanges
502 //   The number of short branch ranges to support. This can be 0 if no support
503 //   for tracking short range branches is needed. The
504 //   AssemblerBufferWithConstantPools class does not need to know what the range
505 //   of branches is - it deals in branch 'deadlines' which is the last buffer
506 //   position that a short-range forward branch can reach. It is assumed that
507 //   the Asm class is able to find the actual branch instruction given a
508 //   (range-index, deadline) pair.
509 //
510 //
511 template <size_t SliceSize, size_t InstSize, class Inst, class Asm,
512           unsigned NumShortBranchRanges = 0>
513 struct AssemblerBufferWithConstantPools
514     : public AssemblerBuffer<SliceSize, Inst> {
515  private:
516   // The PoolEntry index counter. Each PoolEntry is given a unique index,
517   // counting up from zero, and these can be mapped back to the actual pool
518   // entry offset after finishing the buffer, see poolEntryOffset().
519   size_t poolEntryCount;
520 
521  public:
522   class PoolEntry {
523     size_t index_;
524 
525    public:
PoolEntryAssemblerBufferWithConstantPools526     explicit PoolEntry(size_t index) : index_(index) {}
527 
PoolEntryAssemblerBufferWithConstantPools528     PoolEntry() : index_(-1) {}
529 
indexAssemblerBufferWithConstantPools530     size_t index() const { return index_; }
531   };
532 
533  private:
534   typedef AssemblerBuffer<SliceSize, Inst> Parent;
535   using typename Parent::Slice;
536 
537   // The size of a pool guard, in instructions. A branch around the pool.
538   const unsigned guardSize_;
539   // The size of the header that is put at the beginning of a full pool, in
540   // instruction sized units.
541   const unsigned headerSize_;
542 
543   // The maximum pc relative offset encoded in instructions that reference
544   // pool entries. This is generally set to the maximum offset that can be
545   // encoded by the instructions, but for testing can be lowered to affect the
546   // pool placement and frequency of pool placement.
547   const size_t poolMaxOffset_;
548 
549   // The bias on pc relative addressing mode offsets, in units of bytes. The
550   // ARM has a bias of 8 bytes.
551   const unsigned pcBias_;
552 
553   // The current working pool. Copied out as needed before resetting.
554   Pool pool_;
555 
556   // The buffer should be aligned to this address.
557   const size_t instBufferAlign_;
558 
559   struct PoolInfo {
560     // The index of the first entry in this pool.
561     // Pool entries are numbered uniquely across all pools, starting from 0.
562     unsigned firstEntryIndex;
563 
564     // The location of this pool's first entry in the main assembler buffer.
565     // Note that the pool guard and header come before this offset which
566     // points directly at the data.
567     BufferOffset offset;
568 
PoolInfoAssemblerBufferWithConstantPools::PoolInfo569     explicit PoolInfo(unsigned index, BufferOffset data)
570         : firstEntryIndex(index), offset(data) {}
571   };
572 
573   // Info for each pool that has already been dumped. This does not include
574   // any entries in pool_.
575   Vector<PoolInfo, 8, LifoAllocPolicy<Fallible>> poolInfo_;
576 
577   // Set of short-range forward branches that have not yet been bound.
578   // We may need to insert veneers if the final label turns out to be out of
579   // range.
580   //
581   // This set stores (rangeIdx, deadline) pairs instead of the actual branch
582   // locations.
583   BranchDeadlineSet<NumShortBranchRanges> branchDeadlines_;
584 
585   // When true dumping pools is inhibited.
586   bool canNotPlacePool_;
587 
588 #ifdef DEBUG
589   // State for validating the 'maxInst' argument to enterNoPool().
590   // The buffer offset when entering the no-pool region.
591   size_t canNotPlacePoolStartOffset_;
592   // The maximum number of word sized instructions declared for the no-pool
593   // region.
594   size_t canNotPlacePoolMaxInst_;
595 #endif
596 
597   // Instruction to use for alignment fill.
598   const uint32_t alignFillInst_;
599 
600   // Insert a number of NOP instructions between each requested instruction at
601   // all locations at which a pool can potentially spill. This is useful for
602   // checking that instruction locations are correctly referenced and/or
603   // followed.
604   const uint32_t nopFillInst_;
605   const unsigned nopFill_;
606   // For inhibiting the insertion of fill NOPs in the dynamic context in which
607   // they are being inserted.
608   bool inhibitNops_;
609 
610  public:
611   // A unique id within each JitContext, to identify pools in the debug
612   // spew. Set by the MacroAssembler, see getNextAssemblerId().
613   int id;
614 
615  private:
616   // The buffer slices are in a double linked list.
getHeadAssemblerBufferWithConstantPools617   Slice* getHead() const { return this->head; }
getTailAssemblerBufferWithConstantPools618   Slice* getTail() const { return this->tail; }
619 
620  public:
621   // Create an assembler buffer.
622   // Note that this constructor is not allowed to actually allocate memory from
623   // this->lifoAlloc_ because the MacroAssembler constructor has not yet created
624   // an AutoJitContextAlloc.
625   AssemblerBufferWithConstantPools(unsigned guardSize, unsigned headerSize,
626                                    size_t instBufferAlign, size_t poolMaxOffset,
627                                    unsigned pcBias, uint32_t alignFillInst,
628                                    uint32_t nopFillInst, unsigned nopFill = 0)
629       : poolEntryCount(0),
630         guardSize_(guardSize),
631         headerSize_(headerSize),
632         poolMaxOffset_(poolMaxOffset),
633         pcBias_(pcBias),
634         pool_(poolMaxOffset, pcBias, this->lifoAlloc_),
635         instBufferAlign_(instBufferAlign),
636         poolInfo_(this->lifoAlloc_),
637         branchDeadlines_(this->lifoAlloc_),
638         canNotPlacePool_(false),
639 #ifdef DEBUG
640         canNotPlacePoolStartOffset_(0),
641         canNotPlacePoolMaxInst_(0),
642 #endif
643         alignFillInst_(alignFillInst),
644         nopFillInst_(nopFillInst),
645         nopFill_(nopFill),
646         inhibitNops_(false),
647         id(-1) {
648   }
649 
650   // We need to wait until an AutoJitContextAlloc is created by the
651   // MacroAssembler before allocating any space.
initWithAllocatorAssemblerBufferWithConstantPools652   void initWithAllocator() {
653     // We hand out references to lifoAlloc_ in the constructor.
654     // Check that no allocations were made then.
655     MOZ_ASSERT(this->lifoAlloc_.isEmpty(),
656                "Illegal LIFO allocations before AutoJitContextAlloc");
657   }
658 
659  private:
sizeExcludingCurrentPoolAssemblerBufferWithConstantPools660   size_t sizeExcludingCurrentPool() const {
661     // Return the actual size of the buffer, excluding the current pending
662     // pool.
663     return this->nextOffset().getOffset();
664   }
665 
666  public:
sizeAssemblerBufferWithConstantPools667   size_t size() const {
668     // Return the current actual size of the buffer. This is only accurate
669     // if there are no pending pool entries to dump, check.
670     MOZ_ASSERT_IF(!this->oom(), pool_.numEntries() == 0);
671     return sizeExcludingCurrentPool();
672   }
673 
674  private:
insertNopFillAssemblerBufferWithConstantPools675   void insertNopFill() {
676     // Insert fill for testing.
677     if (nopFill_ > 0 && !inhibitNops_ && !canNotPlacePool_) {
678       inhibitNops_ = true;
679 
680       // Fill using a branch-nop rather than a NOP so this can be
681       // distinguished and skipped.
682       for (size_t i = 0; i < nopFill_; i++) putInt(nopFillInst_);
683 
684       inhibitNops_ = false;
685     }
686   }
687 
688   static const unsigned OOM_FAIL = unsigned(-1);
689   static const unsigned DUMMY_INDEX = unsigned(-2);
690 
691   // Check if it is possible to add numInst instructions and numPoolEntries
692   // constant pool entries without needing to flush the current pool.
hasSpaceForInstsAssemblerBufferWithConstantPools693   bool hasSpaceForInsts(unsigned numInsts, unsigned numPoolEntries) const {
694     size_t nextOffset = sizeExcludingCurrentPool();
695     // Earliest starting offset for the current pool after adding numInsts.
696     // This is the beginning of the pool entries proper, after inserting a
697     // guard branch + pool header.
698     size_t poolOffset =
699         nextOffset + (numInsts + guardSize_ + headerSize_) * InstSize;
700 
701     // Any constant pool loads that would go out of range?
702     if (pool_.checkFull(poolOffset)) return false;
703 
704     // Any short-range branch that would go out of range?
705     if (!branchDeadlines_.empty()) {
706       size_t deadline = branchDeadlines_.earliestDeadline().getOffset();
707       size_t poolEnd = poolOffset + pool_.getPoolSize() +
708                        numPoolEntries * sizeof(PoolAllocUnit);
709 
710       // When NumShortBranchRanges > 1, is is possible for branch deadlines to
711       // expire faster than we can insert veneers. Suppose branches are 4 bytes
712       // each, we could have the following deadline set:
713       //
714       //   Range 0: 40, 44, 48
715       //   Range 1: 44, 48
716       //
717       // It is not good enough to start inserting veneers at the 40 deadline; we
718       // would not be able to create veneers for the second 44 deadline.
719       // Instead, we need to start at 32:
720       //
721       //   32: veneer(40)
722       //   36: veneer(44)
723       //   40: veneer(44)
724       //   44: veneer(48)
725       //   48: veneer(48)
726       //
727       // This is a pretty conservative solution to the problem: If we begin at
728       // the earliest deadline, we can always emit all veneers for the range
729       // that currently has the most pending deadlines. That may not leave room
730       // for veneers for the remaining ranges, so reserve space for those
731       // secondary range veneers assuming the worst case deadlines.
732 
733       // Total pending secondary range veneer size.
734       size_t secondaryVeneers = guardSize_ * (branchDeadlines_.size() -
735                                               branchDeadlines_.maxRangeSize());
736 
737       if (deadline < poolEnd + secondaryVeneers) return false;
738     }
739 
740     return true;
741   }
742 
insertEntryForwardsAssemblerBufferWithConstantPools743   unsigned insertEntryForwards(unsigned numInst, unsigned numPoolEntries,
744                                uint8_t* inst, uint8_t* data) {
745     // If inserting pool entries then find a new limiter before we do the
746     // range check.
747     if (numPoolEntries)
748       pool_.updateLimiter(BufferOffset(sizeExcludingCurrentPool()));
749 
750     if (!hasSpaceForInsts(numInst, numPoolEntries)) {
751       if (numPoolEntries)
752         JitSpew(JitSpew_Pools, "[%d] Inserting pool entry caused a spill", id);
753       else
754         JitSpew(JitSpew_Pools, "[%d] Inserting instruction(%zu) caused a spill",
755                 id, sizeExcludingCurrentPool());
756 
757       finishPool();
758       if (this->oom()) return OOM_FAIL;
759       return insertEntryForwards(numInst, numPoolEntries, inst, data);
760     }
761     if (numPoolEntries) {
762       unsigned result = pool_.insertEntry(numPoolEntries, data,
763                                           this->nextOffset(), this->lifoAlloc_);
764       if (result == Pool::OOM_FAIL) {
765         this->fail_oom();
766         return OOM_FAIL;
767       }
768       return result;
769     }
770 
771     // The pool entry index is returned above when allocating an entry, but
772     // when not allocating an entry a dummy value is returned - it is not
773     // expected to be used by the caller.
774     return DUMMY_INDEX;
775   }
776 
777  public:
778   // Get the next buffer offset where an instruction would be inserted.
779   // This may flush the current constant pool before returning nextOffset().
nextInstrOffsetAssemblerBufferWithConstantPools780   BufferOffset nextInstrOffset() {
781     if (!hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0)) {
782       JitSpew(JitSpew_Pools,
783               "[%d] nextInstrOffset @ %d caused a constant pool spill", id,
784               this->nextOffset().getOffset());
785       finishPool();
786     }
787     return this->nextOffset();
788   }
789 
790   MOZ_NEVER_INLINE
791   BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
792                           uint8_t* inst, uint8_t* data,
793                           PoolEntry* pe = nullptr) {
794     // The allocation of pool entries is not supported in a no-pool region,
795     // check.
796     MOZ_ASSERT_IF(numPoolEntries, !canNotPlacePool_);
797 
798     if (this->oom() && !this->bail()) return BufferOffset();
799 
800     insertNopFill();
801 
802 #ifdef JS_JITSPEW
803     if (numPoolEntries && JitSpewEnabled(JitSpew_Pools)) {
804       JitSpew(JitSpew_Pools, "[%d] Inserting %d entries into pool", id,
805               numPoolEntries);
806       JitSpewStart(JitSpew_Pools, "[%d] data is: 0x", id);
807       size_t length = numPoolEntries * sizeof(PoolAllocUnit);
808       for (unsigned idx = 0; idx < length; idx++) {
809         JitSpewCont(JitSpew_Pools, "%02x", data[length - idx - 1]);
810         if (((idx & 3) == 3) && (idx + 1 != length))
811           JitSpewCont(JitSpew_Pools, "_");
812       }
813       JitSpewFin(JitSpew_Pools);
814     }
815 #endif
816 
817     // Insert the pool value.
818     unsigned index = insertEntryForwards(numInst, numPoolEntries, inst, data);
819     if (this->oom()) return BufferOffset();
820 
821     // Now to get an instruction to write.
822     PoolEntry retPE;
823     if (numPoolEntries) {
824       JitSpew(JitSpew_Pools, "[%d] Entry has index %u, offset %zu", id, index,
825               sizeExcludingCurrentPool());
826       Asm::InsertIndexIntoTag(inst, index);
827       // Figure out the offset within the pool entries.
828       retPE = PoolEntry(poolEntryCount);
829       poolEntryCount += numPoolEntries;
830     }
831     // Now inst is a valid thing to insert into the instruction stream.
832     if (pe != nullptr) *pe = retPE;
833     return this->putBytes(numInst * InstSize, inst);
834   }
835 
836   // putInt is the workhorse for the assembler and higher-level buffer
837   // abstractions: it places one instruction into the instruction stream.
838   // Under normal circumstances putInt should just check that the constant
839   // pool does not need to be flushed, that there is space for the single word
840   // of the instruction, and write that word and update the buffer pointer.
841   //
842   // To do better here we need a status variable that handles both nopFill_
843   // and capacity, so that we can quickly know whether to go the slow path.
844   // That could be a variable that has the remaining number of simple
845   // instructions that can be inserted before a more expensive check,
846   // which is set to zero when nopFill_ is set.
847   //
848   // We assume that we don't have to check this->oom() if there is space to
849   // insert a plain instruction; there will always come a later time when it
850   // will be checked anyway.
851 
852   MOZ_ALWAYS_INLINE
putIntAssemblerBufferWithConstantPools853   BufferOffset putInt(uint32_t value) {
854     if (nopFill_ ||
855         !hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0))
856       return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr);
857 
858 #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
859     defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
860     return this->putU32Aligned(value);
861 #else
862     return this->AssemblerBuffer<SliceSize, Inst>::putInt(value);
863 #endif
864   }
865 
866   // Register a short-range branch deadline.
867   //
868   // After inserting a short-range forward branch, call this method to
869   // register the branch 'deadline' which is the last buffer offset that the
870   // branch instruction can reach.
871   //
872   // When the branch is bound to a destination label, call
873   // unregisterBranchDeadline() to stop tracking this branch,
874   //
875   // If the assembled code is about to exceed the registered branch deadline,
876   // and unregisterBranchDeadline() has not yet been called, an
877   // instruction-sized constant pool entry is allocated before the branch
878   // deadline.
879   //
880   // rangeIdx
881   //   A number < NumShortBranchRanges identifying the range of the branch.
882   //
883   // deadline
884   //   The highest buffer offset the the short-range branch can reach
885   //   directly.
886   //
registerBranchDeadlineAssemblerBufferWithConstantPools887   void registerBranchDeadline(unsigned rangeIdx, BufferOffset deadline) {
888     if (!this->oom() && !branchDeadlines_.addDeadline(rangeIdx, deadline))
889       this->fail_oom();
890   }
891 
892   // Un-register a short-range branch deadline.
893   //
894   // When a short-range branch has been successfully bound to its destination
895   // label, call this function to stop traching the branch.
896   //
897   // The (rangeIdx, deadline) pair must be previously registered.
898   //
unregisterBranchDeadlineAssemblerBufferWithConstantPools899   void unregisterBranchDeadline(unsigned rangeIdx, BufferOffset deadline) {
900     if (!this->oom()) branchDeadlines_.removeDeadline(rangeIdx, deadline);
901   }
902 
903  private:
904   // Are any short-range branches about to expire?
hasExpirableShortRangeBranchesAssemblerBufferWithConstantPools905   bool hasExpirableShortRangeBranches() const {
906     if (branchDeadlines_.empty()) return false;
907 
908     // Include branches that would expire in the next N bytes.
909     // The hysteresis avoids the needless creation of many tiny constant
910     // pools.
911     return this->nextOffset().getOffset() + ShortRangeBranchHysteresis >
912            size_t(branchDeadlines_.earliestDeadline().getOffset());
913   }
914 
finishPoolAssemblerBufferWithConstantPools915   void finishPool() {
916     JitSpew(JitSpew_Pools,
917             "[%d] Attempting to finish pool %zu with %u entries.", id,
918             poolInfo_.length(), pool_.numEntries());
919 
920     if (pool_.numEntries() == 0 && !hasExpirableShortRangeBranches()) {
921       // If there is no data in the pool being dumped, don't dump anything.
922       JitSpew(JitSpew_Pools, "[%d] Aborting because the pool is empty", id);
923       return;
924     }
925 
926     // Should not be placing a pool in a no-pool region, check.
927     MOZ_ASSERT(!canNotPlacePool_);
928 
929     // Dump the pool with a guard branch around the pool.
930     BufferOffset guard = this->putBytes(guardSize_ * InstSize, nullptr);
931     BufferOffset header = this->putBytes(headerSize_ * InstSize, nullptr);
932     BufferOffset data = this->putBytesLarge(pool_.getPoolSize(),
933                                             (const uint8_t*)pool_.poolData());
934     if (this->oom()) return;
935 
936     // Now generate branch veneers for any short-range branches that are
937     // about to expire.
938     while (hasExpirableShortRangeBranches()) {
939       unsigned rangeIdx = branchDeadlines_.earliestDeadlineRange();
940       BufferOffset deadline = branchDeadlines_.earliestDeadline();
941 
942       // Stop tracking this branch. The Asm callback below may register
943       // new branches to track.
944       branchDeadlines_.removeDeadline(rangeIdx, deadline);
945 
946       // Make room for the veneer. Same as a pool guard branch.
947       BufferOffset veneer = this->putBytes(guardSize_ * InstSize, nullptr);
948       if (this->oom()) return;
949 
950       // Fix the branch so it targets the veneer.
951       // The Asm class knows how to find the original branch given the
952       // (rangeIdx, deadline) pair.
953       Asm::PatchShortRangeBranchToVeneer(this, rangeIdx, deadline, veneer);
954     }
955 
956     // We only reserved space for the guard branch and pool header.
957     // Fill them in.
958     BufferOffset afterPool = this->nextOffset();
959     Asm::WritePoolGuard(guard, this->getInst(guard), afterPool);
960     Asm::WritePoolHeader((uint8_t*)this->getInst(header), &pool_, false);
961 
962     // With the pool's final position determined it is now possible to patch
963     // the instructions that reference entries in this pool, and this is
964     // done incrementally as each pool is finished.
965     size_t poolOffset = data.getOffset();
966 
967     unsigned idx = 0;
968     for (BufferOffset *iter = pool_.loadOffsets.begin();
969          iter != pool_.loadOffsets.end(); ++iter, ++idx) {
970       // All entries should be before the pool.
971       MOZ_ASSERT(iter->getOffset() < guard.getOffset());
972 
973       // Everything here is known so we can safely do the necessary
974       // substitutions.
975       Inst* inst = this->getInst(*iter);
976       size_t codeOffset = poolOffset - iter->getOffset();
977 
978       // That is, PatchConstantPoolLoad wants to be handed the address of
979       // the pool entry that is being loaded.  We need to do a non-trivial
980       // amount of math here, since the pool that we've made does not
981       // actually reside there in memory.
982       JitSpew(JitSpew_Pools, "[%d] Fixing entry %d offset to %zu", id, idx,
983               codeOffset);
984       Asm::PatchConstantPoolLoad(inst, (uint8_t*)inst + codeOffset);
985     }
986 
987     // Record the pool info.
988     unsigned firstEntry = poolEntryCount - pool_.numEntries();
989     if (!poolInfo_.append(PoolInfo(firstEntry, data))) {
990       this->fail_oom();
991       return;
992     }
993 
994     // Reset everything to the state that it was in when we started.
995     pool_.reset();
996   }
997 
998  public:
flushPoolAssemblerBufferWithConstantPools999   void flushPool() {
1000     if (this->oom()) return;
1001     JitSpew(JitSpew_Pools, "[%d] Requesting a pool flush", id);
1002     finishPool();
1003   }
1004 
enterNoPoolAssemblerBufferWithConstantPools1005   void enterNoPool(size_t maxInst) {
1006     // Don't allow re-entry.
1007     MOZ_ASSERT(!canNotPlacePool_);
1008     insertNopFill();
1009 
1010     // Check if the pool will spill by adding maxInst instructions, and if
1011     // so then finish the pool before entering the no-pool region. It is
1012     // assumed that no pool entries are allocated in a no-pool region and
1013     // this is asserted when allocating entries.
1014     if (!hasSpaceForInsts(maxInst, 0)) {
1015       JitSpew(JitSpew_Pools, "[%d] No-Pool instruction(%zu) caused a spill.",
1016               id, sizeExcludingCurrentPool());
1017       finishPool();
1018     }
1019 
1020 #ifdef DEBUG
1021     // Record the buffer position to allow validating maxInst when leaving
1022     // the region.
1023     canNotPlacePoolStartOffset_ = this->nextOffset().getOffset();
1024     canNotPlacePoolMaxInst_ = maxInst;
1025 #endif
1026 
1027     canNotPlacePool_ = true;
1028   }
1029 
leaveNoPoolAssemblerBufferWithConstantPools1030   void leaveNoPool() {
1031     MOZ_ASSERT(canNotPlacePool_);
1032     canNotPlacePool_ = false;
1033 
1034     // Validate the maxInst argument supplied to enterNoPool().
1035     MOZ_ASSERT(this->nextOffset().getOffset() - canNotPlacePoolStartOffset_ <=
1036                canNotPlacePoolMaxInst_ * InstSize);
1037   }
1038 
alignAssemblerBufferWithConstantPools1039   void align(unsigned alignment) {
1040     MOZ_ASSERT(mozilla::IsPowerOfTwo(alignment));
1041     MOZ_ASSERT(alignment >= InstSize);
1042 
1043     // A pool many need to be dumped at this point, so insert NOP fill here.
1044     insertNopFill();
1045 
1046     // Check if the code position can be aligned without dumping a pool.
1047     unsigned requiredFill = sizeExcludingCurrentPool() & (alignment - 1);
1048     if (requiredFill == 0) return;
1049     requiredFill = alignment - requiredFill;
1050 
1051     // Add an InstSize because it is probably not useful for a pool to be
1052     // dumped at the aligned code position.
1053     if (!hasSpaceForInsts(requiredFill / InstSize + 1, 0)) {
1054       // Alignment would cause a pool dump, so dump the pool now.
1055       JitSpew(JitSpew_Pools, "[%d] Alignment of %d at %zu caused a spill.", id,
1056               alignment, sizeExcludingCurrentPool());
1057       finishPool();
1058     }
1059 
1060     inhibitNops_ = true;
1061     while ((sizeExcludingCurrentPool() & (alignment - 1)) && !this->oom())
1062       putInt(alignFillInst_);
1063     inhibitNops_ = false;
1064   }
1065 
1066  public:
executableCopyAssemblerBufferWithConstantPools1067   void executableCopy(uint8_t* dest) {
1068     if (this->oom()) return;
1069     // The pools should have all been flushed, check.
1070     MOZ_ASSERT(pool_.numEntries() == 0);
1071     for (Slice* cur = getHead(); cur != nullptr; cur = cur->getNext()) {
1072       memcpy(dest, &cur->instructions[0], cur->length());
1073       dest += cur->length();
1074     }
1075   }
1076 
appendRawCodeAssemblerBufferWithConstantPools1077   bool appendRawCode(const uint8_t* code, size_t numBytes) {
1078     if (this->oom()) return false;
1079     // The pools should have all been flushed, check.
1080     MOZ_ASSERT(pool_.numEntries() == 0);
1081     while (numBytes > SliceSize) {
1082       this->putBytes(SliceSize, code);
1083       numBytes -= SliceSize;
1084       code += SliceSize;
1085     }
1086     this->putBytes(numBytes, code);
1087     return !this->oom();
1088   }
1089 
1090  public:
poolEntryOffsetAssemblerBufferWithConstantPools1091   size_t poolEntryOffset(PoolEntry pe) const {
1092     MOZ_ASSERT(pe.index() < poolEntryCount - pool_.numEntries(),
1093                "Invalid pool entry, or not flushed yet.");
1094     // Find the pool containing pe.index().
1095     // The array is sorted, so we can use a binary search.
1096     auto b = poolInfo_.begin(), e = poolInfo_.end();
1097     // A note on asymmetric types in the upper_bound comparator:
1098     // http://permalink.gmane.org/gmane.comp.compilers.clang.devel/10101
1099     auto i = std::upper_bound(b, e, pe.index(),
1100                               [](size_t value, const PoolInfo& entry) {
1101                                 return value < entry.firstEntryIndex;
1102                               });
1103     // Since upper_bound finds the first pool greater than pe,
1104     // we want the previous one which is the last one less than or equal.
1105     MOZ_ASSERT(i != b, "PoolInfo not sorted or empty?");
1106     --i;
1107     // The i iterator now points to the pool containing pe.index.
1108     MOZ_ASSERT(i->firstEntryIndex <= pe.index() &&
1109                (i + 1 == e || (i + 1)->firstEntryIndex > pe.index()));
1110     // Compute the byte offset into the pool.
1111     unsigned relativeIndex = pe.index() - i->firstEntryIndex;
1112     return i->offset.getOffset() + relativeIndex * sizeof(PoolAllocUnit);
1113   }
1114 };
1115 
1116 }  // namespace jit
1117 }  // namespace js
1118 
1119 #endif  // jit_shared_IonAssemblerBufferWithConstantPools_h
1120