1 //! Main file / top-level module for regalloc library.
2 //!
3 //! We have tried hard to make the library's interface as simple as possible,
4 //! yet flexible enough that the allocators it implements can provide good
5 //! quality allocations in reasonable time.  Nevertheless, there is still
6 //! significant semantic complexity in parts of the interface.  If you intend
7 //! to use this library in your own code, you would be well advised to read
8 //! the comments in this file very carefully.
9 
10 // Make the analysis module public for fuzzing.
11 #[cfg(feature = "fuzzing")]
12 pub mod analysis_main;
13 #[cfg(not(feature = "fuzzing"))]
14 mod analysis_main;
15 
16 mod analysis_control_flow;
17 mod analysis_data_flow;
18 mod avl_tree;
19 mod bt_coalescing_analysis;
20 mod bt_commitment_map;
21 mod bt_main;
22 mod bt_spillslot_allocator;
23 mod bt_vlr_priority_queue;
24 mod checker;
25 mod data_structures;
26 mod inst_stream;
27 mod linear_scan;
28 mod reg_maps;
29 mod sparse_set;
30 mod union_find;
31 
32 use log::{info, log_enabled, Level};
33 use std::default;
34 use std::{borrow::Cow, fmt};
35 
36 // Stuff that is defined by the library
37 
38 // Sets and maps of things.  We can refine these later; but for now the
39 // interface needs some way to speak about them, so let's use the
40 // library-provided versions.
41 
42 pub use crate::data_structures::Map;
43 pub use crate::data_structures::Set;
44 
45 // Register classes
46 
47 pub use crate::data_structures::RegClass;
48 
49 // Registers, both real and virtual, and ways to create them
50 
51 pub use crate::data_structures::Reg;
52 
53 pub use crate::data_structures::RealReg;
54 pub use crate::data_structures::VirtualReg;
55 
56 pub use crate::data_structures::Writable;
57 
58 pub use crate::data_structures::NUM_REG_CLASSES;
59 
60 // Spill slots
61 
62 pub use crate::data_structures::SpillSlot;
63 
64 // The "register universe".  This describes the registers available to the
65 // allocator.  There are very strict requirements on the structure of the
66 // universe.  If you fail to observe these requirements, either the allocator
67 // itself, or the resulting code, will fail in mysterious ways, and your life
68 // will be miserable while you try to figure out what happened.  There are
69 // lower level details on the definition of RealRegUniverse which you also
70 // need to take note of.  The overall contract is as follows.
71 //
72 // === (1) === Basic structure ===
73 //
74 // A "register universe" is a read-only structure that contains all
75 // information about real registers on a given host.  For each register class
76 // (RegClass) supported by the target, the universe must provide a vector of
77 // registers that the allocator may use.
78 //
79 // The universe may also list other registers that the incoming
80 // virtual-registerised code may use, but which are not available for use by
81 // the allocator.  Indeed, the universe *must* list *all* registers that will
82 // ever be mentioned in the incoming code.  Failure to do so will cause the
83 // allocator's analysis phase to return an error.
84 //
85 // === (2) === Ordering of registers within each class
86 //
87 // The ordering of available registers within these vectors does not affect
88 // the correctness of the final allocation.  However, it will affect the
89 // quality of final allocation.  Clients are recommended to list, for each
90 // class, the callee-saved registers first, and the caller-saved registers
91 // after that.  The currently supported allocation algorithms (Backtracking
92 // and LinearScan) will try to use the first available registers in each
93 // class, that is to say, callee-saved ones first.  The purpose of this is to
94 // try and minimise spilling around calls by avoiding use of caller-saved ones
95 // if possible.
96 //
97 // There is a twist here, however.  The abovementioned heuristic works well
98 // for non-leaf functions (functions that contain at least one call).  But for
99 // leaf functions, we would prefer to use the caller-saved registers first,
100 // since doing so has potential to minimise the number of registers that must
101 // be saved/restored in the prologue and epilogue.  Presently there is no way
102 // to tell this interface that the function is a leaf function, and so the
103 // only way to get optimal code in this case is to present a universe with the
104 // registers listed in the opposite order.
105 //
106 // This is of course inconvenient for the caller, since it requires
107 // maintenance of two separate universes.  In the future we will add a boolean
108 // parameter to the top level function `allocate_registers` that indicates
109 // that whether or not the function is a leaf function.
110 //
111 // === (3) === The "suggested scratch register" ===
112 //
113 // Some allocation algorithms, particularly linear-scan, may need to have a
114 // scratch register available for their own use.  The register universe must
115 // nominate a scratch register in each class, specified in
116 // RealRegUniverse::allocable_by_class[..]::Some(suggested_scratch).  The
117 // choice of scratch register is influenced by the architecture, the ABI, and
118 // client-side fixed-use register conventions.  There rules are as follows:
119 //
120 // (1) For each class, the universe must offer a reserved register.
121 //
122 // (2) The reserved register may not have any implied-by-the architecture
123 //     reads/modifies/writes for any instruction in the vcode.  Unfortunately
124 //     there is no easy way for this library to check that.
125 //
126 // (3) The reserved register must not have any reads or modifies by any
127 //     instruction in the vcode.  In other words, it must not be handed to
128 //     either the `add_use` or `add_mod` function of the `RegUsageCollector`
129 //     that is presented to the client's `get_regs` function.  If any such
130 //     mention is detected, the library will return an error.
131 //
132 // (4) The reserved reg may be mentioned as written by instructions in the
133 //     vcode, though -- in other words it may be handed to `add_def`.  The
134 //     library will tolerate and correctly handle that.  However, because no
135 //     vcode instruction may read or modify the reserved register, all such
136 //     writes are "dead".  This in turn guarantees that the allocator can, if
137 //     it wants, change the value in it at any time, without changing the
138 //     behaviour of the final generated code.
139 //
140 // Currently, the LinearScan algorithm may use the reserved registers.  The
141 // Backtracking algorithm will ignore the hints and treat them as "normal"
142 // allocatable registers.
143 
144 pub use crate::data_structures::RealRegUniverse;
145 pub use crate::data_structures::RegClassInfo;
146 
147 // A structure for collecting information about which registers each
148 // instruction uses.
149 
150 pub use crate::data_structures::RegUsageCollector;
151 
152 /// A trait for providing mapping results for a given instruction.
153 ///
154 /// This provides virtual to real register mappings for every mention in an instruction: use, mod
155 /// or def. The main purpose of this trait is to be used when re-writing the instruction stream
156 /// after register allocation happened; see also `Function::map_regs`.
157 pub trait RegUsageMapper: fmt::Debug {
158     /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a use
159     /// on the current instruction.
get_use(&self, vreg: VirtualReg) -> Option<RealReg>160     fn get_use(&self, vreg: VirtualReg) -> Option<RealReg>;
161 
162     /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a def
163     /// on the current instruction.
get_def(&self, vreg: VirtualReg) -> Option<RealReg>164     fn get_def(&self, vreg: VirtualReg) -> Option<RealReg>;
165 
166     /// Return the `RealReg` if mapped, or `None`, for a `vreg` occuring as a
167     /// mod on the current instruction.
get_mod(&self, vreg: VirtualReg) -> Option<RealReg>168     fn get_mod(&self, vreg: VirtualReg) -> Option<RealReg>;
169 }
170 
171 // TypedIxVector, so that the interface can speak about vectors of blocks and
172 // instructions.
173 
174 pub use crate::data_structures::TypedIxVec;
175 pub use crate::data_structures::{BlockIx, InstIx, Range};
176 
177 /// A trait defined by the regalloc client to provide access to its
178 /// machine-instruction / CFG representation.
179 pub trait Function {
180     /// Regalloc is parameterized on F: Function and so can use the projected
181     /// type F::Inst.
182     type Inst: Clone + fmt::Debug;
183 
184     // -------------
185     // CFG traversal
186     // -------------
187 
188     /// Allow access to the underlying vector of instructions.
insns(&self) -> &[Self::Inst]189     fn insns(&self) -> &[Self::Inst];
190 
191     /// Get all instruction indices as an iterable range.
insn_indices(&self) -> Range<InstIx>192     fn insn_indices(&self) -> Range<InstIx> {
193         Range::new(InstIx::new(0), self.insns().len())
194     }
195 
196     /// Allow mutable access to the underlying vector of instructions.
insns_mut(&mut self) -> &mut [Self::Inst]197     fn insns_mut(&mut self) -> &mut [Self::Inst];
198 
199     /// Get an instruction with a type-safe InstIx index.
get_insn(&self, insn: InstIx) -> &Self::Inst200     fn get_insn(&self, insn: InstIx) -> &Self::Inst;
201 
202     /// Get a mutable borrow of an instruction with the given type-safe InstIx
203     /// index.
get_insn_mut(&mut self, insn: InstIx) -> &mut Self::Inst204     fn get_insn_mut(&mut self, insn: InstIx) -> &mut Self::Inst;
205 
206     /// Allow iteration over basic blocks (in instruction order).
blocks(&self) -> Range<BlockIx>207     fn blocks(&self) -> Range<BlockIx>;
208 
209     /// Get the index of the entry block.
entry_block(&self) -> BlockIx210     fn entry_block(&self) -> BlockIx;
211 
212     /// Provide the range of instruction indices contained in each block.
block_insns(&self, block: BlockIx) -> Range<InstIx>213     fn block_insns(&self, block: BlockIx) -> Range<InstIx>;
214 
215     /// Get CFG successors for a given block.
block_succs(&self, block: BlockIx) -> Cow<[BlockIx]>216     fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]>;
217 
218     /// Determine whether an instruction is a return instruction.
is_ret(&self, insn: InstIx) -> bool219     fn is_ret(&self, insn: InstIx) -> bool;
220 
221     // --------------------------
222     // Instruction register slots
223     // --------------------------
224 
225     /// Add to `collector` the used, defined, and modified registers for an
226     /// instruction.
get_regs(insn: &Self::Inst, collector: &mut RegUsageCollector)227     fn get_regs(insn: &Self::Inst, collector: &mut RegUsageCollector);
228 
229     /// Map each register slot through a virtual-to-real mapping indexed
230     /// by virtual register. The two separate maps in `maps.pre` and
231     /// `maps.post` provide the mapping to use for uses (which semantically
232     /// occur just prior to the instruction's effect) and defs (which
233     /// semantically occur just after the instruction's effect). Regs that were
234     /// "modified" can use either map; the vreg should be the same in both.
235     ///
236     /// Note that this does not take a `self`, because we want to allow the
237     /// regalloc to have a mutable borrow of an insn (which borrows the whole
238     /// Function in turn) outstanding while calling this.
map_regs<RUM: RegUsageMapper>(insn: &mut Self::Inst, maps: &RUM)239     fn map_regs<RUM: RegUsageMapper>(insn: &mut Self::Inst, maps: &RUM);
240 
241     /// Allow the regalloc to query whether this is a move. Returns (dst, src).
is_move(&self, insn: &Self::Inst) -> Option<(Writable<Reg>, Reg)>242     fn is_move(&self, insn: &Self::Inst) -> Option<(Writable<Reg>, Reg)>;
243 
244     /// Get an estimate of how many `VirtualReg` indices are used, if available, to allow
245     /// preallocating data structures.
get_vreg_count_estimate(&self) -> Option<usize>246     fn get_vreg_count_estimate(&self) -> Option<usize> {
247         None
248     }
249 
250     // --------------
251     // Spills/reloads
252     // --------------
253 
254     /// How many logical spill slots does the given regclass require?  E.g., on a
255     /// 64-bit machine, spill slots may nominally be 64-bit words, but a 128-bit
256     /// vector value will require two slots.  The regalloc will always align on
257     /// this size.
258     ///
259     /// This passes the associated virtual register to the client as well,
260     /// because the way in which we spill a real register may depend on the
261     /// value that we are using it for. E.g., if a machine has V128 registers
262     /// but we also use them for F32 and F64 values, we may use a different
263     /// store-slot size and smaller-operand store/load instructions for an F64
264     /// than for a true V128.
get_spillslot_size(&self, regclass: RegClass, for_vreg: VirtualReg) -> u32265     fn get_spillslot_size(&self, regclass: RegClass, for_vreg: VirtualReg) -> u32;
266 
267     /// Generate a spill instruction for insertion into the instruction
268     /// sequence. The associated virtual register (whose value is being spilled)
269     /// is passed so that the client may make decisions about the instruction to
270     /// generate based on the type of value in question.  Because the register
271     /// allocator will insert spill instructions at arbitrary points, the
272     /// returned instruction here must not modify the machine's condition codes.
gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, for_vreg: VirtualReg) -> Self::Inst273     fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, for_vreg: VirtualReg) -> Self::Inst;
274 
275     /// Generate a reload instruction for insertion into the instruction
276     /// sequence. The associated virtual register (whose value is being loaded)
277     /// is passed as well.  The returned instruction must not modify the
278     /// machine's condition codes.
gen_reload( &self, to_reg: Writable<RealReg>, from_slot: SpillSlot, for_vreg: VirtualReg, ) -> Self::Inst279     fn gen_reload(
280         &self,
281         to_reg: Writable<RealReg>,
282         from_slot: SpillSlot,
283         for_vreg: VirtualReg,
284     ) -> Self::Inst;
285 
286     /// Generate a register-to-register move for insertion into the instruction
287     /// sequence. The associated virtual register is passed as well.  The
288     /// returned instruction must not modify the machine's condition codes.
gen_move( &self, to_reg: Writable<RealReg>, from_reg: RealReg, for_vreg: VirtualReg, ) -> Self::Inst289     fn gen_move(
290         &self,
291         to_reg: Writable<RealReg>,
292         from_reg: RealReg,
293         for_vreg: VirtualReg,
294     ) -> Self::Inst;
295 
296     /// Generate an instruction which is a no-op and has zero length.
gen_zero_len_nop(&self) -> Self::Inst297     fn gen_zero_len_nop(&self) -> Self::Inst;
298 
299     /// Try to alter an existing instruction to use a value directly in a
300     /// spillslot (accessing memory directly) instead of the given register. May
301     /// be useful on ISAs that have mem/reg ops, like x86.
302     ///
303     /// Note that this is not *quite* just fusing a load with the op; if the
304     /// value is def'd or modified, it should be written back to the spill slot
305     /// as well. In other words, it is just using the spillslot as if it were a
306     /// real register, for reads and/or writes.
307     ///
308     /// FIXME JRS 2020Feb06: state precisely the constraints on condition code
309     /// changes.
maybe_direct_reload( &self, insn: &Self::Inst, reg: VirtualReg, slot: SpillSlot, ) -> Option<Self::Inst>310     fn maybe_direct_reload(
311         &self,
312         insn: &Self::Inst,
313         reg: VirtualReg,
314         slot: SpillSlot,
315     ) -> Option<Self::Inst>;
316 
317     // ----------------------------------------------------------
318     // Function liveins, liveouts, and direct-mode real registers
319     // ----------------------------------------------------------
320 
321     /// Return the set of registers that should be considered live at the
322     /// beginning of the function. This is semantically equivalent to an
323     /// instruction at the top of the entry block def'ing all registers in this
324     /// set.
func_liveins(&self) -> Set<RealReg>325     fn func_liveins(&self) -> Set<RealReg>;
326 
327     /// Return the set of registers that should be considered live at the
328     /// end of the function (after every return instruction). This is
329     /// semantically equivalent to an instruction at each block with no successors
330     /// that uses each of these registers.
func_liveouts(&self) -> Set<RealReg>331     fn func_liveouts(&self) -> Set<RealReg>;
332 }
333 
334 /// The result of register allocation.  Note that allocation can fail!
335 pub struct RegAllocResult<F: Function> {
336     /// A new sequence of instructions with all register slots filled with real
337     /// registers, and spills/fills/moves possibly inserted (and unneeded moves
338     /// elided).
339     pub insns: Vec<F::Inst>,
340 
341     /// Basic-block start indices for the new instruction list, indexed by the
342     /// original basic block indices. May be used by the client to, e.g., remap
343     /// branch targets appropriately.
344     pub target_map: TypedIxVec<BlockIx, InstIx>,
345 
346     /// Full mapping from new instruction indices to original instruction
347     /// indices. May be needed by the client to, for example, update metadata
348     /// such as debug/source-location info as the instructions are spliced
349     /// and reordered.
350     ///
351     /// Each entry is an `InstIx`, but may be `InstIx::invalid_value()` if the
352     /// new instruction at this new index was inserted by the allocator
353     /// (i.e., if it is a load, spill or move instruction).
354     pub orig_insn_map: TypedIxVec</* new */ InstIx, /* orig */ InstIx>,
355 
356     /// Which real registers were overwritten? This will contain all real regs
357     /// that appear as defs or modifies in register slots of the output
358     /// instruction list.  This will only list registers that are available to
359     /// the allocator.  If one of the instructions clobbers a register which
360     /// isn't available to the allocator, it won't be mentioned here.
361     pub clobbered_registers: Set<RealReg>,
362 
363     /// How many spill slots were used?
364     pub num_spill_slots: u32,
365 
366     /// Block annotation strings, for debugging.  Requires requesting in the
367     /// call to `allocate_registers`.  Creating of these annotations is
368     /// potentially expensive, so don't request them if you don't need them.
369     pub block_annotations: Option<TypedIxVec<BlockIx, Vec<String>>>,
370 }
371 
372 /// A choice of register allocation algorithm to run.
373 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
374 pub enum AlgorithmWithDefaults {
375     Backtracking,
376     LinearScan,
377 }
378 
379 pub use crate::analysis_main::AnalysisError;
380 pub use crate::checker::{CheckerError, CheckerErrors};
381 
382 /// An error from the register allocator.
383 #[derive(Clone, Debug)]
384 pub enum RegAllocError {
385     OutOfRegisters(RegClass),
386     MissingSuggestedScratchReg(RegClass),
387     Analysis(AnalysisError),
388     RegChecker(CheckerErrors),
389     Other(String),
390 }
391 
392 impl fmt::Display for RegAllocError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result393     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
394         write!(f, "{:?}", self)
395     }
396 }
397 
398 pub use crate::bt_main::BacktrackingOptions;
399 pub use crate::linear_scan::LinearScanOptions;
400 
401 #[derive(Clone)]
402 pub enum Algorithm {
403     LinearScan(LinearScanOptions),
404     Backtracking(BacktrackingOptions),
405 }
406 
407 impl fmt::Debug for Algorithm {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result408     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
409         match self {
410             Algorithm::LinearScan(opts) => write!(fmt, "{:?}", opts),
411             Algorithm::Backtracking(opts) => write!(fmt, "{:?}", opts),
412         }
413     }
414 }
415 
416 /// Tweakable options shared by all the allocators.
417 #[derive(Clone)]
418 pub struct Options {
419     /// Should the register allocator check that its results are valid? This adds runtime to the
420     /// compiler, so this is disabled by default.
421     pub run_checker: bool,
422 
423     /// Which algorithm should be used for register allocation? By default, selects backtracking,
424     /// which is slower to compile but creates code of better quality.
425     pub algorithm: Algorithm,
426 }
427 
428 impl default::Default for Options {
default() -> Self429     fn default() -> Self {
430         Self {
431             run_checker: false,
432             algorithm: Algorithm::Backtracking(Default::default()),
433         }
434     }
435 }
436 
437 impl fmt::Debug for Options {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result438     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
439         write!(
440             f,
441             "checker: {:?}, algorithm: {:?}",
442             self.run_checker, self.algorithm
443         )
444     }
445 }
446 
447 /// Allocate registers for a function's code, given a universe of real registers that we are
448 /// allowed to use.
449 ///
450 /// The control flow graph must not contain any critical edges, that is, any edge coming from a
451 /// block with multiple successors must not flow into a block with multiple predecessors. The
452 /// embedder must have split critical edges before handing over the function to this function.
453 /// Otherwise, an error will be returned.
454 ///
455 /// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
456 /// fail, returning an error.
457 ///
458 /// Runtime options can be passed to the allocators, through the use of [Options] for options
459 /// common to all the backends. The choice of algorithm is done by passing a given [Algorithm]
460 /// instance, with options tailored for each algorithm.
461 #[inline(never)]
allocate_registers_with_opts<F: Function>( func: &mut F, rreg_universe: &RealRegUniverse, opts: Options, ) -> Result<RegAllocResult<F>, RegAllocError>462 pub fn allocate_registers_with_opts<F: Function>(
463     func: &mut F,
464     rreg_universe: &RealRegUniverse,
465     opts: Options,
466 ) -> Result<RegAllocResult<F>, RegAllocError> {
467     info!("");
468     info!("================ regalloc.rs: BEGIN function ================");
469     if log_enabled!(Level::Info) {
470         info!("with options: {:?}", opts);
471         let strs = rreg_universe.show();
472         info!("using RealRegUniverse:");
473         for s in strs {
474             info!("  {}", s);
475         }
476     }
477     let run_checker = opts.run_checker;
478     let res = match &opts.algorithm {
479         Algorithm::Backtracking(opts) => {
480             bt_main::alloc_main(func, rreg_universe, run_checker, opts)
481         }
482         Algorithm::LinearScan(opts) => linear_scan::run(func, rreg_universe, run_checker, opts),
483     };
484     info!("================ regalloc.rs: END function ================");
485     res
486 }
487 
488 /// Allocate registers for a function's code, given a universe of real registers that we are
489 /// allowed to use.
490 ///
491 /// The control flow graph must not contain any critical edges, that is, any edge coming from a
492 /// block with multiple successors must not flow into a block with multiple predecessors. The
493 /// embedder must have split critical edges before handing over the function to this function.
494 /// Otherwise, an error will be returned.
495 ///
496 /// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
497 /// fail, returning an error.
498 ///
499 /// This is a convenient function that uses standard options for the allocator, according to the
500 /// selected algorithm.
501 #[inline(never)]
allocate_registers<F: Function>( func: &mut F, rreg_universe: &RealRegUniverse, algorithm: AlgorithmWithDefaults, ) -> Result<RegAllocResult<F>, RegAllocError>502 pub fn allocate_registers<F: Function>(
503     func: &mut F,
504     rreg_universe: &RealRegUniverse,
505     algorithm: AlgorithmWithDefaults,
506 ) -> Result<RegAllocResult<F>, RegAllocError> {
507     let algorithm = match algorithm {
508         AlgorithmWithDefaults::Backtracking => Algorithm::Backtracking(Default::default()),
509         AlgorithmWithDefaults::LinearScan => Algorithm::LinearScan(Default::default()),
510     };
511     let opts = Options {
512         algorithm,
513         ..Default::default()
514     };
515     allocate_registers_with_opts(func, rreg_universe, opts)
516 }
517