1 //! Main file / top-level module for regalloc library.
2 //!
3 //! We have tried hard to make the library's interface as simple as possible,
4 //! yet flexible enough that the allocators it implements can provide good
5 //! quality allocations in reasonable time. Nevertheless, there is still
6 //! significant semantic complexity in parts of the interface. If you intend
7 //! to use this library in your own code, you would be well advised to read
8 //! the comments in this file very carefully.
9
10 // Make the analysis module public for fuzzing.
11 #[cfg(feature = "fuzzing")]
12 pub mod analysis_main;
13 #[cfg(not(feature = "fuzzing"))]
14 mod analysis_main;
15
16 mod analysis_control_flow;
17 mod analysis_data_flow;
18 mod avl_tree;
19 mod bt_coalescing_analysis;
20 mod bt_commitment_map;
21 mod bt_main;
22 mod bt_spillslot_allocator;
23 mod bt_vlr_priority_queue;
24 mod checker;
25 mod data_structures;
26 mod inst_stream;
27 mod linear_scan;
28 mod reg_maps;
29 mod sparse_set;
30 mod union_find;
31
32 use log::{info, log_enabled, Level};
33 use std::default;
34 use std::{borrow::Cow, fmt};
35
36 // Stuff that is defined by the library
37
38 // Sets and maps of things. We can refine these later; but for now the
39 // interface needs some way to speak about them, so let's use the
40 // library-provided versions.
41
42 pub use crate::data_structures::Map;
43 pub use crate::data_structures::Set;
44
45 // Register classes
46
47 pub use crate::data_structures::RegClass;
48
49 // Registers, both real and virtual, and ways to create them
50
51 pub use crate::data_structures::Reg;
52
53 pub use crate::data_structures::RealReg;
54 pub use crate::data_structures::VirtualReg;
55
56 pub use crate::data_structures::Writable;
57
58 pub use crate::data_structures::NUM_REG_CLASSES;
59
60 // Spill slots
61
62 pub use crate::data_structures::SpillSlot;
63
64 // The "register universe". This describes the registers available to the
65 // allocator. There are very strict requirements on the structure of the
66 // universe. If you fail to observe these requirements, either the allocator
67 // itself, or the resulting code, will fail in mysterious ways, and your life
68 // will be miserable while you try to figure out what happened. There are
69 // lower level details on the definition of RealRegUniverse which you also
70 // need to take note of. The overall contract is as follows.
71 //
72 // === (1) === Basic structure ===
73 //
74 // A "register universe" is a read-only structure that contains all
75 // information about real registers on a given host. For each register class
76 // (RegClass) supported by the target, the universe must provide a vector of
77 // registers that the allocator may use.
78 //
79 // The universe may also list other registers that the incoming
80 // virtual-registerised code may use, but which are not available for use by
81 // the allocator. Indeed, the universe *must* list *all* registers that will
82 // ever be mentioned in the incoming code. Failure to do so will cause the
83 // allocator's analysis phase to return an error.
84 //
85 // === (2) === Ordering of registers within each class
86 //
87 // The ordering of available registers within these vectors does not affect
88 // the correctness of the final allocation. However, it will affect the
89 // quality of final allocation. Clients are recommended to list, for each
90 // class, the callee-saved registers first, and the caller-saved registers
91 // after that. The currently supported allocation algorithms (Backtracking
92 // and LinearScan) will try to use the first available registers in each
93 // class, that is to say, callee-saved ones first. The purpose of this is to
94 // try and minimise spilling around calls by avoiding use of caller-saved ones
95 // if possible.
96 //
97 // There is a twist here, however. The abovementioned heuristic works well
98 // for non-leaf functions (functions that contain at least one call). But for
99 // leaf functions, we would prefer to use the caller-saved registers first,
100 // since doing so has potential to minimise the number of registers that must
101 // be saved/restored in the prologue and epilogue. Presently there is no way
102 // to tell this interface that the function is a leaf function, and so the
103 // only way to get optimal code in this case is to present a universe with the
104 // registers listed in the opposite order.
105 //
106 // This is of course inconvenient for the caller, since it requires
107 // maintenance of two separate universes. In the future we will add a boolean
108 // parameter to the top level function `allocate_registers` that indicates
109 // that whether or not the function is a leaf function.
110 //
111 // === (3) === The "suggested scratch register" ===
112 //
113 // Some allocation algorithms, particularly linear-scan, may need to have a
114 // scratch register available for their own use. The register universe must
115 // nominate a scratch register in each class, specified in
116 // RealRegUniverse::allocable_by_class[..]::Some(suggested_scratch). The
117 // choice of scratch register is influenced by the architecture, the ABI, and
118 // client-side fixed-use register conventions. There rules are as follows:
119 //
120 // (1) For each class, the universe must offer a reserved register.
121 //
122 // (2) The reserved register may not have any implied-by-the architecture
123 // reads/modifies/writes for any instruction in the vcode. Unfortunately
124 // there is no easy way for this library to check that.
125 //
126 // (3) The reserved register must not have any reads or modifies by any
127 // instruction in the vcode. In other words, it must not be handed to
128 // either the `add_use` or `add_mod` function of the `RegUsageCollector`
129 // that is presented to the client's `get_regs` function. If any such
130 // mention is detected, the library will return an error.
131 //
132 // (4) The reserved reg may be mentioned as written by instructions in the
133 // vcode, though -- in other words it may be handed to `add_def`. The
134 // library will tolerate and correctly handle that. However, because no
135 // vcode instruction may read or modify the reserved register, all such
136 // writes are "dead". This in turn guarantees that the allocator can, if
137 // it wants, change the value in it at any time, without changing the
138 // behaviour of the final generated code.
139 //
140 // Currently, the LinearScan algorithm may use the reserved registers. The
141 // Backtracking algorithm will ignore the hints and treat them as "normal"
142 // allocatable registers.
143
144 pub use crate::data_structures::RealRegUniverse;
145 pub use crate::data_structures::RegClassInfo;
146
147 // A structure for collecting information about which registers each
148 // instruction uses.
149
150 pub use crate::data_structures::RegUsageCollector;
151
152 /// A trait for providing mapping results for a given instruction.
153 ///
154 /// This provides virtual to real register mappings for every mention in an instruction: use, mod
155 /// or def. The main purpose of this trait is to be used when re-writing the instruction stream
156 /// after register allocation happened; see also `Function::map_regs`.
157 pub trait RegUsageMapper: fmt::Debug {
158 /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a use
159 /// on the current instruction.
get_use(&self, vreg: VirtualReg) -> Option<RealReg>160 fn get_use(&self, vreg: VirtualReg) -> Option<RealReg>;
161
162 /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a def
163 /// on the current instruction.
get_def(&self, vreg: VirtualReg) -> Option<RealReg>164 fn get_def(&self, vreg: VirtualReg) -> Option<RealReg>;
165
166 /// Return the `RealReg` if mapped, or `None`, for a `vreg` occuring as a
167 /// mod on the current instruction.
get_mod(&self, vreg: VirtualReg) -> Option<RealReg>168 fn get_mod(&self, vreg: VirtualReg) -> Option<RealReg>;
169 }
170
171 // TypedIxVector, so that the interface can speak about vectors of blocks and
172 // instructions.
173
174 pub use crate::data_structures::TypedIxVec;
175 pub use crate::data_structures::{BlockIx, InstIx, Range};
176
177 /// A trait defined by the regalloc client to provide access to its
178 /// machine-instruction / CFG representation.
179 pub trait Function {
180 /// Regalloc is parameterized on F: Function and so can use the projected
181 /// type F::Inst.
182 type Inst: Clone + fmt::Debug;
183
184 // -------------
185 // CFG traversal
186 // -------------
187
188 /// Allow access to the underlying vector of instructions.
insns(&self) -> &[Self::Inst]189 fn insns(&self) -> &[Self::Inst];
190
191 /// Get all instruction indices as an iterable range.
insn_indices(&self) -> Range<InstIx>192 fn insn_indices(&self) -> Range<InstIx> {
193 Range::new(InstIx::new(0), self.insns().len())
194 }
195
196 /// Allow mutable access to the underlying vector of instructions.
insns_mut(&mut self) -> &mut [Self::Inst]197 fn insns_mut(&mut self) -> &mut [Self::Inst];
198
199 /// Get an instruction with a type-safe InstIx index.
get_insn(&self, insn: InstIx) -> &Self::Inst200 fn get_insn(&self, insn: InstIx) -> &Self::Inst;
201
202 /// Get a mutable borrow of an instruction with the given type-safe InstIx
203 /// index.
get_insn_mut(&mut self, insn: InstIx) -> &mut Self::Inst204 fn get_insn_mut(&mut self, insn: InstIx) -> &mut Self::Inst;
205
206 /// Allow iteration over basic blocks (in instruction order).
blocks(&self) -> Range<BlockIx>207 fn blocks(&self) -> Range<BlockIx>;
208
209 /// Get the index of the entry block.
entry_block(&self) -> BlockIx210 fn entry_block(&self) -> BlockIx;
211
212 /// Provide the range of instruction indices contained in each block.
block_insns(&self, block: BlockIx) -> Range<InstIx>213 fn block_insns(&self, block: BlockIx) -> Range<InstIx>;
214
215 /// Get CFG successors for a given block.
block_succs(&self, block: BlockIx) -> Cow<[BlockIx]>216 fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]>;
217
218 /// Determine whether an instruction is a return instruction.
is_ret(&self, insn: InstIx) -> bool219 fn is_ret(&self, insn: InstIx) -> bool;
220
221 // --------------------------
222 // Instruction register slots
223 // --------------------------
224
225 /// Add to `collector` the used, defined, and modified registers for an
226 /// instruction.
get_regs(insn: &Self::Inst, collector: &mut RegUsageCollector)227 fn get_regs(insn: &Self::Inst, collector: &mut RegUsageCollector);
228
229 /// Map each register slot through a virtual-to-real mapping indexed
230 /// by virtual register. The two separate maps in `maps.pre` and
231 /// `maps.post` provide the mapping to use for uses (which semantically
232 /// occur just prior to the instruction's effect) and defs (which
233 /// semantically occur just after the instruction's effect). Regs that were
234 /// "modified" can use either map; the vreg should be the same in both.
235 ///
236 /// Note that this does not take a `self`, because we want to allow the
237 /// regalloc to have a mutable borrow of an insn (which borrows the whole
238 /// Function in turn) outstanding while calling this.
map_regs<RUM: RegUsageMapper>(insn: &mut Self::Inst, maps: &RUM)239 fn map_regs<RUM: RegUsageMapper>(insn: &mut Self::Inst, maps: &RUM);
240
241 /// Allow the regalloc to query whether this is a move. Returns (dst, src).
is_move(&self, insn: &Self::Inst) -> Option<(Writable<Reg>, Reg)>242 fn is_move(&self, insn: &Self::Inst) -> Option<(Writable<Reg>, Reg)>;
243
244 /// Get an estimate of how many `VirtualReg` indices are used, if available, to allow
245 /// preallocating data structures.
get_vreg_count_estimate(&self) -> Option<usize>246 fn get_vreg_count_estimate(&self) -> Option<usize> {
247 None
248 }
249
250 // --------------
251 // Spills/reloads
252 // --------------
253
254 /// How many logical spill slots does the given regclass require? E.g., on a
255 /// 64-bit machine, spill slots may nominally be 64-bit words, but a 128-bit
256 /// vector value will require two slots. The regalloc will always align on
257 /// this size.
258 ///
259 /// This passes the associated virtual register to the client as well,
260 /// because the way in which we spill a real register may depend on the
261 /// value that we are using it for. E.g., if a machine has V128 registers
262 /// but we also use them for F32 and F64 values, we may use a different
263 /// store-slot size and smaller-operand store/load instructions for an F64
264 /// than for a true V128.
get_spillslot_size(&self, regclass: RegClass, for_vreg: VirtualReg) -> u32265 fn get_spillslot_size(&self, regclass: RegClass, for_vreg: VirtualReg) -> u32;
266
267 /// Generate a spill instruction for insertion into the instruction
268 /// sequence. The associated virtual register (whose value is being spilled)
269 /// is passed so that the client may make decisions about the instruction to
270 /// generate based on the type of value in question. Because the register
271 /// allocator will insert spill instructions at arbitrary points, the
272 /// returned instruction here must not modify the machine's condition codes.
gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, for_vreg: VirtualReg) -> Self::Inst273 fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, for_vreg: VirtualReg) -> Self::Inst;
274
275 /// Generate a reload instruction for insertion into the instruction
276 /// sequence. The associated virtual register (whose value is being loaded)
277 /// is passed as well. The returned instruction must not modify the
278 /// machine's condition codes.
gen_reload( &self, to_reg: Writable<RealReg>, from_slot: SpillSlot, for_vreg: VirtualReg, ) -> Self::Inst279 fn gen_reload(
280 &self,
281 to_reg: Writable<RealReg>,
282 from_slot: SpillSlot,
283 for_vreg: VirtualReg,
284 ) -> Self::Inst;
285
286 /// Generate a register-to-register move for insertion into the instruction
287 /// sequence. The associated virtual register is passed as well. The
288 /// returned instruction must not modify the machine's condition codes.
gen_move( &self, to_reg: Writable<RealReg>, from_reg: RealReg, for_vreg: VirtualReg, ) -> Self::Inst289 fn gen_move(
290 &self,
291 to_reg: Writable<RealReg>,
292 from_reg: RealReg,
293 for_vreg: VirtualReg,
294 ) -> Self::Inst;
295
296 /// Generate an instruction which is a no-op and has zero length.
gen_zero_len_nop(&self) -> Self::Inst297 fn gen_zero_len_nop(&self) -> Self::Inst;
298
299 /// Try to alter an existing instruction to use a value directly in a
300 /// spillslot (accessing memory directly) instead of the given register. May
301 /// be useful on ISAs that have mem/reg ops, like x86.
302 ///
303 /// Note that this is not *quite* just fusing a load with the op; if the
304 /// value is def'd or modified, it should be written back to the spill slot
305 /// as well. In other words, it is just using the spillslot as if it were a
306 /// real register, for reads and/or writes.
307 ///
308 /// FIXME JRS 2020Feb06: state precisely the constraints on condition code
309 /// changes.
maybe_direct_reload( &self, insn: &Self::Inst, reg: VirtualReg, slot: SpillSlot, ) -> Option<Self::Inst>310 fn maybe_direct_reload(
311 &self,
312 insn: &Self::Inst,
313 reg: VirtualReg,
314 slot: SpillSlot,
315 ) -> Option<Self::Inst>;
316
317 // ----------------------------------------------------------
318 // Function liveins, liveouts, and direct-mode real registers
319 // ----------------------------------------------------------
320
321 /// Return the set of registers that should be considered live at the
322 /// beginning of the function. This is semantically equivalent to an
323 /// instruction at the top of the entry block def'ing all registers in this
324 /// set.
func_liveins(&self) -> Set<RealReg>325 fn func_liveins(&self) -> Set<RealReg>;
326
327 /// Return the set of registers that should be considered live at the
328 /// end of the function (after every return instruction). This is
329 /// semantically equivalent to an instruction at each block with no successors
330 /// that uses each of these registers.
func_liveouts(&self) -> Set<RealReg>331 fn func_liveouts(&self) -> Set<RealReg>;
332 }
333
334 /// The result of register allocation. Note that allocation can fail!
335 pub struct RegAllocResult<F: Function> {
336 /// A new sequence of instructions with all register slots filled with real
337 /// registers, and spills/fills/moves possibly inserted (and unneeded moves
338 /// elided).
339 pub insns: Vec<F::Inst>,
340
341 /// Basic-block start indices for the new instruction list, indexed by the
342 /// original basic block indices. May be used by the client to, e.g., remap
343 /// branch targets appropriately.
344 pub target_map: TypedIxVec<BlockIx, InstIx>,
345
346 /// Full mapping from new instruction indices to original instruction
347 /// indices. May be needed by the client to, for example, update metadata
348 /// such as debug/source-location info as the instructions are spliced
349 /// and reordered.
350 ///
351 /// Each entry is an `InstIx`, but may be `InstIx::invalid_value()` if the
352 /// new instruction at this new index was inserted by the allocator
353 /// (i.e., if it is a load, spill or move instruction).
354 pub orig_insn_map: TypedIxVec</* new */ InstIx, /* orig */ InstIx>,
355
356 /// Which real registers were overwritten? This will contain all real regs
357 /// that appear as defs or modifies in register slots of the output
358 /// instruction list. This will only list registers that are available to
359 /// the allocator. If one of the instructions clobbers a register which
360 /// isn't available to the allocator, it won't be mentioned here.
361 pub clobbered_registers: Set<RealReg>,
362
363 /// How many spill slots were used?
364 pub num_spill_slots: u32,
365
366 /// Block annotation strings, for debugging. Requires requesting in the
367 /// call to `allocate_registers`. Creating of these annotations is
368 /// potentially expensive, so don't request them if you don't need them.
369 pub block_annotations: Option<TypedIxVec<BlockIx, Vec<String>>>,
370 }
371
372 /// A choice of register allocation algorithm to run.
373 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
374 pub enum AlgorithmWithDefaults {
375 Backtracking,
376 LinearScan,
377 }
378
379 pub use crate::analysis_main::AnalysisError;
380 pub use crate::checker::{CheckerError, CheckerErrors};
381
382 /// An error from the register allocator.
383 #[derive(Clone, Debug)]
384 pub enum RegAllocError {
385 OutOfRegisters(RegClass),
386 MissingSuggestedScratchReg(RegClass),
387 Analysis(AnalysisError),
388 RegChecker(CheckerErrors),
389 Other(String),
390 }
391
392 impl fmt::Display for RegAllocError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result393 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
394 write!(f, "{:?}", self)
395 }
396 }
397
398 pub use crate::bt_main::BacktrackingOptions;
399 pub use crate::linear_scan::LinearScanOptions;
400
401 #[derive(Clone)]
402 pub enum Algorithm {
403 LinearScan(LinearScanOptions),
404 Backtracking(BacktrackingOptions),
405 }
406
407 impl fmt::Debug for Algorithm {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result408 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
409 match self {
410 Algorithm::LinearScan(opts) => write!(fmt, "{:?}", opts),
411 Algorithm::Backtracking(opts) => write!(fmt, "{:?}", opts),
412 }
413 }
414 }
415
416 /// Tweakable options shared by all the allocators.
417 #[derive(Clone)]
418 pub struct Options {
419 /// Should the register allocator check that its results are valid? This adds runtime to the
420 /// compiler, so this is disabled by default.
421 pub run_checker: bool,
422
423 /// Which algorithm should be used for register allocation? By default, selects backtracking,
424 /// which is slower to compile but creates code of better quality.
425 pub algorithm: Algorithm,
426 }
427
428 impl default::Default for Options {
default() -> Self429 fn default() -> Self {
430 Self {
431 run_checker: false,
432 algorithm: Algorithm::Backtracking(Default::default()),
433 }
434 }
435 }
436
437 impl fmt::Debug for Options {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result438 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
439 write!(
440 f,
441 "checker: {:?}, algorithm: {:?}",
442 self.run_checker, self.algorithm
443 )
444 }
445 }
446
447 /// Allocate registers for a function's code, given a universe of real registers that we are
448 /// allowed to use.
449 ///
450 /// The control flow graph must not contain any critical edges, that is, any edge coming from a
451 /// block with multiple successors must not flow into a block with multiple predecessors. The
452 /// embedder must have split critical edges before handing over the function to this function.
453 /// Otherwise, an error will be returned.
454 ///
455 /// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
456 /// fail, returning an error.
457 ///
458 /// Runtime options can be passed to the allocators, through the use of [Options] for options
459 /// common to all the backends. The choice of algorithm is done by passing a given [Algorithm]
460 /// instance, with options tailored for each algorithm.
461 #[inline(never)]
allocate_registers_with_opts<F: Function>( func: &mut F, rreg_universe: &RealRegUniverse, opts: Options, ) -> Result<RegAllocResult<F>, RegAllocError>462 pub fn allocate_registers_with_opts<F: Function>(
463 func: &mut F,
464 rreg_universe: &RealRegUniverse,
465 opts: Options,
466 ) -> Result<RegAllocResult<F>, RegAllocError> {
467 info!("");
468 info!("================ regalloc.rs: BEGIN function ================");
469 if log_enabled!(Level::Info) {
470 info!("with options: {:?}", opts);
471 let strs = rreg_universe.show();
472 info!("using RealRegUniverse:");
473 for s in strs {
474 info!(" {}", s);
475 }
476 }
477 let run_checker = opts.run_checker;
478 let res = match &opts.algorithm {
479 Algorithm::Backtracking(opts) => {
480 bt_main::alloc_main(func, rreg_universe, run_checker, opts)
481 }
482 Algorithm::LinearScan(opts) => linear_scan::run(func, rreg_universe, run_checker, opts),
483 };
484 info!("================ regalloc.rs: END function ================");
485 res
486 }
487
488 /// Allocate registers for a function's code, given a universe of real registers that we are
489 /// allowed to use.
490 ///
491 /// The control flow graph must not contain any critical edges, that is, any edge coming from a
492 /// block with multiple successors must not flow into a block with multiple predecessors. The
493 /// embedder must have split critical edges before handing over the function to this function.
494 /// Otherwise, an error will be returned.
495 ///
496 /// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
497 /// fail, returning an error.
498 ///
499 /// This is a convenient function that uses standard options for the allocator, according to the
500 /// selected algorithm.
501 #[inline(never)]
allocate_registers<F: Function>( func: &mut F, rreg_universe: &RealRegUniverse, algorithm: AlgorithmWithDefaults, ) -> Result<RegAllocResult<F>, RegAllocError>502 pub fn allocate_registers<F: Function>(
503 func: &mut F,
504 rreg_universe: &RealRegUniverse,
505 algorithm: AlgorithmWithDefaults,
506 ) -> Result<RegAllocResult<F>, RegAllocError> {
507 let algorithm = match algorithm {
508 AlgorithmWithDefaults::Backtracking => Algorithm::Backtracking(Default::default()),
509 AlgorithmWithDefaults::LinearScan => Algorithm::LinearScan(Default::default()),
510 };
511 let opts = Options {
512 algorithm,
513 ..Default::default()
514 };
515 allocate_registers_with_opts(func, rreg_universe, opts)
516 }
517