1 #![allow(non_snake_case)]
2 #![allow(non_camel_case_types)]
3
4 //! Core implementation of the backtracking allocator.
5
6 use log::{debug, info, log_enabled, Level};
7 use smallvec::SmallVec;
8 use std::default;
9 use std::fmt;
10
11 use crate::analysis_data_flow::{add_raw_reg_vecs_for_insn, does_inst_use_def_or_mod_reg};
12 use crate::analysis_main::{run_analysis, AnalysisInfo};
13 use crate::avl_tree::{AVLTree, AVL_NULL};
14 use crate::bt_coalescing_analysis::{do_coalescing_analysis, Hint};
15 use crate::bt_commitment_map::{CommitmentMap, RangeFragAndVLRIx};
16 use crate::bt_spillslot_allocator::SpillSlotAllocator;
17 use crate::bt_vlr_priority_queue::VirtualRangePrioQ;
18 use crate::data_structures::{
19 BlockIx, InstIx, InstPoint, Point, RangeFrag, RangeFragIx, RealRange, RealReg, RealRegUniverse,
20 Reg, RegVecBounds, RegVecs, Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec,
21 VirtualRange, VirtualRangeIx, VirtualReg, Writable,
22 };
23 use crate::inst_stream::{edit_inst_stream, InstToInsert, InstToInsertAndPoint};
24 use crate::sparse_set::SparseSetU;
25 use crate::union_find::UnionFindEquivClasses;
26 use crate::{Function, RegAllocError, RegAllocResult};
27
28 #[derive(Clone)]
29 pub struct BacktrackingOptions {
30 /// Should the register allocator generate block annotations?
31 pub request_block_annotations: bool,
32 }
33
34 impl default::Default for BacktrackingOptions {
default() -> Self35 fn default() -> Self {
36 Self {
37 request_block_annotations: false,
38 }
39 }
40 }
41
42 impl fmt::Debug for BacktrackingOptions {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result43 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
44 write!(
45 fmt,
46 "backtracking (block annotations: {})",
47 self.request_block_annotations
48 )
49 }
50 }
51
52 //=============================================================================
53 // The per-real-register state
54 //
55 // Relevant methods are expected to be parameterised by the same VirtualRange
56 // env as used in calls to `VirtualRangePrioQ`.
57
58 struct PerRealReg {
59 // The current committed fragments for this RealReg.
60 committed: CommitmentMap,
61
62 // The set of VirtualRanges which have been assigned to this RealReg. The
63 // union of their frags will be equal to `committed` only if this RealReg
64 // has no RealRanges. If this RealReg does have RealRanges the
65 // aforementioned union will be exactly the subset of `committed` not used
66 // by the RealRanges.
67 vlrixs_assigned: Set<VirtualRangeIx>,
68 }
69 impl PerRealReg {
new() -> Self70 fn new() -> Self {
71 Self {
72 committed: CommitmentMap::new(),
73 vlrixs_assigned: Set::<VirtualRangeIx>::empty(),
74 }
75 }
76
77 #[inline(never)]
add_RealRange(&mut self, to_add: &RealRange, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>)78 fn add_RealRange(&mut self, to_add: &RealRange, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) {
79 // Commit this register to `to_add`, irrevocably. Don't add it to
80 // `vlrixs_assigned` since we will never want to later evict the
81 // assignment.
82 self.committed
83 .add_indirect(&to_add.sorted_frags, None, frag_env);
84 }
85
86 #[inline(never)]
add_VirtualRange( &mut self, to_add_vlrix: VirtualRangeIx, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, )87 fn add_VirtualRange(
88 &mut self,
89 to_add_vlrix: VirtualRangeIx,
90 vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
91 ) {
92 let to_add_vlr = &vlr_env[to_add_vlrix];
93 self.committed
94 .add(&to_add_vlr.sorted_frags, Some(to_add_vlrix));
95 assert!(!self.vlrixs_assigned.contains(to_add_vlrix));
96 self.vlrixs_assigned.insert(to_add_vlrix);
97 }
98
99 #[inline(never)]
del_VirtualRange( &mut self, to_del_vlrix: VirtualRangeIx, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, )100 fn del_VirtualRange(
101 &mut self,
102 to_del_vlrix: VirtualRangeIx,
103 vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
104 ) {
105 // Remove it from `vlrixs_assigned`
106 if self.vlrixs_assigned.contains(to_del_vlrix) {
107 self.vlrixs_assigned.delete(to_del_vlrix);
108 } else {
109 panic!("PerRealReg: del_VirtualRange on VR not in vlrixs_assigned");
110 }
111 // Remove it from `committed`
112 let to_del_vlr = &vlr_env[to_del_vlrix];
113 self.committed.del(&to_del_vlr.sorted_frags);
114 }
115 }
116
117 // HELPER FUNCTION
118 // For a given `RangeFrag`, traverse the commitment tree rooted at `root`,
119 // adding to `running_set` the set of VLRIxs that the frag intersects, and
120 // adding their spill costs to `running_cost`. Return false if, for one of
121 // the 4 reasons documented below, the traversal has been abandoned, and true
122 // if the search completed successfully.
search_commitment_tree<IsAllowedToEvict>( running_set: &mut SparseSetU<[VirtualRangeIx; 4]>, running_cost: &mut SpillCost, tree: &AVLTree<RangeFragAndVLRIx>, pair_frag: &RangeFrag, spill_cost_budget: &SpillCost, allowed_to_evict: &IsAllowedToEvict, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, ) -> bool where IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,123 fn search_commitment_tree<IsAllowedToEvict>(
124 // The running state, threaded through the tree traversal. These
125 // accumulate ranges and costs as we traverse the tree. These are mutable
126 // because our caller (`find_evict_set`) will want to try and allocate
127 // multiple `RangeFrag`s in this tree, not just a single one, and so it
128 // needs a way to accumulate the total evict-cost and evict-set for all
129 // the `RangeFrag`s it iterates over.
130 running_set: &mut SparseSetU<[VirtualRangeIx; 4]>,
131 running_cost: &mut SpillCost,
132 // The tree to search.
133 tree: &AVLTree<RangeFragAndVLRIx>,
134 // The RangeFrag we want to accommodate.
135 pair_frag: &RangeFrag,
136 spill_cost_budget: &SpillCost,
137 allowed_to_evict: &IsAllowedToEvict,
138 vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
139 ) -> bool
140 where
141 IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,
142 {
143 let mut stack = SmallVec::<[u32; 32]>::new();
144 assert!(tree.root != AVL_NULL);
145 stack.push(tree.root);
146
147 while let Some(curr) = stack.pop() {
148 let curr_node = &tree.pool[curr as usize];
149 let curr_node_item = &curr_node.item;
150 let curr_frag = &curr_node_item.frag;
151
152 // Figure out whether `pair_frag` overlaps the root of the current
153 // subtree.
154 let overlaps_curr = pair_frag.last >= curr_frag.first && pair_frag.first <= curr_frag.last;
155
156 // Let's first consider the current node. If we need it but it's not
157 // evictable, we might as well stop now.
158 if overlaps_curr {
159 // This frag has no associated VirtualRangeIx, so it is part of a
160 // RealRange, and hence not evictable.
161 if curr_node_item.mb_vlrix.is_none() {
162 return false;
163 }
164 // Maybe this one is a spill range, in which case, it can't be
165 // evicted.
166 let vlrix_to_evict = curr_node_item.mb_vlrix.unwrap();
167 let vlr_to_evict = &vlr_env[vlrix_to_evict];
168 if vlr_to_evict.spill_cost.is_infinite() {
169 return false;
170 }
171 // Check that this range alone doesn't exceed our total spill
172 // cost. NB: given the check XXX below, this isn't actually
173 // necessary; however it means that we avoid doing two
174 // SparseSet::contains operations before exiting. This saves
175 // around 0.3% instruction count for large inputs.
176 if !vlr_to_evict.spill_cost.is_less_than(spill_cost_budget) {
177 return false;
178 }
179 // Maybe our caller doesn't want us to evict this one.
180 if !allowed_to_evict(vlrix_to_evict) {
181 return false;
182 }
183 // Ok! We can evict the current node. Update the running state
184 // accordingly. Note that we may be presented with the same VLRIx
185 // to evict multiple times, so we must be careful to add the cost
186 // of it only once.
187 if !running_set.contains(vlrix_to_evict) {
188 let mut tmp_cost = *running_cost;
189 tmp_cost.add(&vlr_to_evict.spill_cost);
190 // See above XXX
191 if !tmp_cost.is_less_than(spill_cost_budget) {
192 return false;
193 }
194 *running_cost = tmp_cost;
195 running_set.insert(vlrix_to_evict);
196 }
197 }
198
199 // Now figure out if we need to visit the subtrees, and if so push the
200 // relevant nodes. Whether we visit the left or right subtree first
201 // is unimportant, at least from a correctness perspective.
202 let must_check_left = pair_frag.first < curr_frag.first;
203 if must_check_left {
204 let left_of_curr = tree.pool[curr as usize].left;
205 if left_of_curr != AVL_NULL {
206 stack.push(left_of_curr);
207 }
208 }
209
210 let must_check_right = pair_frag.last > curr_frag.last;
211 if must_check_right {
212 let right_of_curr = tree.pool[curr as usize].right;
213 if right_of_curr != AVL_NULL {
214 stack.push(right_of_curr);
215 }
216 }
217 }
218
219 // If we get here, it means that `pair_frag` can be accommodated if we
220 // evict all the frags it overlaps in `tree`.
221 //
222 // Stay sane ..
223 assert!(running_cost.is_finite());
224 assert!(running_cost.is_less_than(spill_cost_budget));
225 true
226 }
227
228 impl PerRealReg {
229 // Find the set of VirtualRangeIxs that would need to be evicted in order to
230 // allocate `would_like_to_add` to this register. Virtual ranges mentioned
231 // in `do_not_evict` must not be considered as candidates for eviction.
232 // Also returns the total associated spill cost. That spill cost cannot be
233 // infinite.
234 //
235 // This can fail (return None) for four different reasons:
236 //
237 // - `would_like_to_add` interferes with a real-register-live-range
238 // commitment, so the register would be unavailable even if we evicted
239 // *all* virtual ranges assigned to it.
240 //
241 // - `would_like_to_add` interferes with a virtual range which is a spill
242 // range (has infinite cost). We cannot evict those without risking
243 // non-termination of the overall allocation algorithm.
244 //
245 // - `would_like_to_add` interferes with a virtual range listed in
246 // `do_not_evict`. Our caller uses this mechanism when trying to do
247 // coalesing, to avoid the nonsensicality of evicting some part of a
248 // virtual live range group in order to allocate a member of the same
249 // group.
250 //
251 // - The total spill cost of the candidate set exceeds the spill cost of
252 // `would_like_to_add`. This means that spilling them would be a net loss
253 // per our cost model. Note that `would_like_to_add` may have an infinite
254 // spill cost, in which case it will "win" over all other
255 // non-infinite-cost eviction candidates. This is by design (so as to
256 // guarantee that we can always allocate spill/reload bridges).
257 #[inline(never)]
find_evict_set<IsAllowedToEvict>( &self, would_like_to_add: VirtualRangeIx, allowed_to_evict: &IsAllowedToEvict, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, ) -> Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> where IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,258 fn find_evict_set<IsAllowedToEvict>(
259 &self,
260 would_like_to_add: VirtualRangeIx,
261 allowed_to_evict: &IsAllowedToEvict,
262 vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
263 ) -> Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)>
264 where
265 IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,
266 {
267 // Firstly, if the commitment tree is for this reg is empty, we can
268 // declare success immediately.
269 if self.committed.tree.root == AVL_NULL {
270 let evict_set = SparseSetU::<[VirtualRangeIx; 4]>::empty();
271 let evict_cost = SpillCost::zero();
272 return Some((evict_set, evict_cost));
273 }
274
275 // The tree isn't empty, so we will have to do this the hard way: iterate
276 // over all fragments in `would_like_to_add` and check them against the
277 // tree.
278
279 // Useful constants for the main loop
280 let would_like_to_add_vlr = &vlr_env[would_like_to_add];
281 let evict_cost_budget = would_like_to_add_vlr.spill_cost;
282 // Note that `evict_cost_budget` can be infinite because
283 // `would_like_to_add` might be a spill/reload range.
284
285 // The overall evict set and cost so far. These are updated as we iterate
286 // over the fragments that make up `would_like_to_add`.
287 let mut running_set = SparseSetU::<[VirtualRangeIx; 4]>::empty();
288 let mut running_cost = SpillCost::zero();
289
290 // "wlta" = would like to add
291 for wlta_frag in &would_like_to_add_vlr.sorted_frags.frags {
292 let wlta_frag_ok = search_commitment_tree(
293 &mut running_set,
294 &mut running_cost,
295 &self.committed.tree,
296 &wlta_frag,
297 &evict_cost_budget,
298 allowed_to_evict,
299 vlr_env,
300 );
301 if !wlta_frag_ok {
302 // This fragment won't fit, for one of the four reasons listed
303 // above. So give up now.
304 return None;
305 }
306 // And move on to the next fragment.
307 }
308
309 // If we got here, it means that `would_like_to_add` can be accommodated \o/
310 assert!(running_cost.is_finite());
311 assert!(running_cost.is_less_than(&evict_cost_budget));
312 Some((running_set, running_cost))
313 }
314
315 #[allow(dead_code)]
316 #[inline(never)]
show1_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String317 fn show1_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String {
318 //"in_use: ".to_string() + &self.committed.show_with_frag_env(&frag_env)
319 "(show1_with_envs:FIXME)".to_string()
320 }
321 #[allow(dead_code)]
322 #[inline(never)]
show2_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String323 fn show2_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String {
324 //"assigned: ".to_string() + &format!("{:?}", &self.vlrixs_assigned)
325 "(show2_with_envs:FIXME)".to_string()
326 }
327 }
328
329 //=============================================================================
330 // Printing the allocator's top level state
331
332 #[inline(never)]
print_RA_state( who: &str, _universe: &RealRegUniverse, prioQ: &VirtualRangePrioQ, _perRealReg: &Vec<PerRealReg>, edit_list_move: &Vec<EditListItem>, edit_list_other: &Vec<EditListItem>, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>, )333 fn print_RA_state(
334 who: &str,
335 _universe: &RealRegUniverse,
336 // State components
337 prioQ: &VirtualRangePrioQ,
338 _perRealReg: &Vec<PerRealReg>,
339 edit_list_move: &Vec<EditListItem>,
340 edit_list_other: &Vec<EditListItem>,
341 // The context (environment)
342 vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
343 _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
344 ) {
345 debug!("<<<<====---- RA state at '{}' ----====", who);
346 //for ix in 0..perRealReg.len() {
347 // if !&perRealReg[ix].committed.pairs.is_empty() {
348 // debug!(
349 // "{:<5} {}",
350 // universe.regs[ix].1,
351 // &perRealReg[ix].show1_with_envs(&frag_env)
352 // );
353 // debug!(" {}", &perRealReg[ix].show2_with_envs(&frag_env));
354 // debug!("");
355 // }
356 //}
357 if !prioQ.is_empty() {
358 for s in prioQ.show_with_envs(vlr_env) {
359 debug!("{}", s);
360 }
361 }
362 for eli in edit_list_move {
363 debug!("ELI MOVE: {:?}", eli);
364 }
365 for eli in edit_list_other {
366 debug!("ELI other: {:?}", eli);
367 }
368 debug!(">>>>");
369 }
370
371 //=============================================================================
372 // Allocator top level
373
374 /* (const) For each virtual live range
375 - its sorted RangeFrags
376 - its total size
377 - its spill cost
378 - (eventually) its assigned real register
379 New VirtualRanges are created as we go, but existing ones are unchanged,
380 apart from being tagged with its assigned real register.
381
382 (mut) For each real register
383 - the sorted RangeFrags assigned to it (todo: rm the M)
384 - the virtual LR indices assigned to it. This is so we can eject
385 a VirtualRange from the commitment, as needed
386
387 (mut) the set of VirtualRanges not yet allocated, prioritised by total size
388
389 (mut) the edit list that is produced
390 */
391 /*
392 fn show_commit_tab(commit_tab: &Vec::<SortedRangeFragIxs>,
393 who: &str,
394 context: &TypedIxVec::<RangeFragIx, RangeFrag>) -> String {
395 let mut res = "Commit Table at '".to_string()
396 + &who.to_string() + &"'\n".to_string();
397 let mut rregNo = 0;
398 for smf in commit_tab.iter() {
399 res += &" ".to_string();
400 res += &mkRealReg(rregNo).show();
401 res += &" ".to_string();
402 res += &smf.show_with_fenv(&context);
403 res += &"\n".to_string();
404 rregNo += 1;
405 }
406 res
407 }
408 */
409
410 // VirtualRanges created by spilling all pertain to a single InstIx. But
411 // within that InstIx, there are three kinds of "bridges":
412 #[derive(Clone, Copy, PartialEq)]
413 pub(crate) enum BridgeKind {
414 RtoU, // A bridge for a USE. This connects the reload to the use.
415 RtoS, // a bridge for a MOD. This connects the reload, the use/def
416 // and the spill, since the value must first be reloade, then
417 // operated on, and finally re-spilled.
418 DtoS, // A bridge for a DEF. This connects the def to the spill.
419 }
420
421 impl fmt::Debug for BridgeKind {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result422 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
423 match self {
424 BridgeKind::RtoU => write!(fmt, "R->U"),
425 BridgeKind::RtoS => write!(fmt, "R->S"),
426 BridgeKind::DtoS => write!(fmt, "D->S"),
427 }
428 }
429 }
430
431 #[derive(Clone, Copy)]
432 struct EditListItem {
433 // This holds enough information to create a spill or reload instruction,
434 // or both, and also specifies where in the instruction stream it/they
435 // should be added. Note that if the edit list as a whole specifies
436 // multiple items for the same location, then it is assumed that the order
437 // in which they execute isn't important.
438 //
439 // Some of the relevant info can be found via the VirtualRangeIx link:
440 // (1) the real reg involved
441 // (2) the place where the insn should go, since the VirtualRange should
442 // only have one RangeFrag, and we can deduce the correct location
443 // from that.
444 // Despite (2) we also carry here the InstIx of the affected instruction
445 // (there should be only one) since computing it via (2) is expensive.
446 // This however gives a redundancy in representation against (2). Beware!
447 slot: SpillSlot,
448 vlrix: VirtualRangeIx,
449 kind: BridgeKind,
450 iix: InstIx,
451 }
452
453 impl fmt::Debug for EditListItem {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result454 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
455 write!(
456 fmt,
457 "(ELI: at {:?} for {:?} add {:?}, slot={:?})",
458 self.iix, self.vlrix, self.kind, self.slot
459 )
460 }
461 }
462
463 // Allocator top level. This function returns a result struct that contains
464 // the final sequence of instructions, possibly with fills/spills/moves
465 // spliced in and redundant moves elided, and with all virtual registers
466 // replaced with real registers. Allocation can fail if there are insufficient
467 // registers to even generate spill/reload code, or if the function appears to
468 // have any undefined VirtualReg/RealReg uses.
469
470 #[inline(never)]
alloc_main<F: Function>( func: &mut F, reg_universe: &RealRegUniverse, use_checker: bool, opts: &BacktrackingOptions, ) -> Result<RegAllocResult<F>, RegAllocError>471 pub fn alloc_main<F: Function>(
472 func: &mut F,
473 reg_universe: &RealRegUniverse,
474 use_checker: bool,
475 opts: &BacktrackingOptions,
476 ) -> Result<RegAllocResult<F>, RegAllocError> {
477 // -------- Perform initial liveness analysis --------
478 // Note that the analysis phase can fail; hence we propagate any error.
479 let AnalysisInfo {
480 reg_vecs_and_bounds,
481 real_ranges: rlr_env,
482 virtual_ranges: mut vlr_env,
483 range_frags: frag_env,
484 range_metrics: frag_metrics_env,
485 estimated_frequencies: est_freqs,
486 inst_to_block_map,
487 ..
488 } = run_analysis(func, reg_universe).map_err(|err| RegAllocError::Analysis(err))?;
489
490 assert!(reg_vecs_and_bounds.is_sanitized());
491 assert!(frag_env.len() == frag_metrics_env.len());
492
493 // Also perform analysis that finds all coalesing opportunities.
494 let coalescing_info = do_coalescing_analysis(
495 func,
496 ®_vecs_and_bounds,
497 &rlr_env,
498 &mut vlr_env,
499 &frag_env,
500 &est_freqs,
501 ®_universe,
502 );
503 let mut hints: TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>> = coalescing_info.0;
504 let vlrEquivClasses: UnionFindEquivClasses<VirtualRangeIx> = coalescing_info.1;
505 let is_vv_boundary_move: TypedIxVec<InstIx, bool> = coalescing_info.2;
506 let vreg_to_vlrs_map: Vec</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>> = coalescing_info.3;
507 assert!(hints.len() == vlr_env.len());
508
509 // -------- Alloc main --------
510
511 // Create initial state
512 info!("alloc_main: begin");
513 info!(
514 "alloc_main: in: {} insns in {} blocks",
515 func.insns().len(),
516 func.blocks().len()
517 );
518 let num_vlrs_initial = vlr_env.len();
519 info!(
520 "alloc_main: in: {} VLRs, {} RLRs",
521 num_vlrs_initial,
522 rlr_env.len()
523 );
524
525 // This is fully populated by the ::new call.
526 let mut prioQ = VirtualRangePrioQ::new(&vlr_env);
527
528 // Whereas this is empty. We have to populate it "by hand", by
529 // effectively cloning the allocatable part (prefix) of the universe.
530 let mut per_real_reg = Vec::<PerRealReg>::new();
531 for _ in 0..reg_universe.allocable {
532 // Doing this instead of simply .resize avoids needing Clone for
533 // PerRealReg
534 per_real_reg.push(PerRealReg::new());
535 }
536 for rlr in rlr_env.iter() {
537 let rregIndex = rlr.rreg.get_index();
538 // Ignore RealRanges for RealRegs that are not part of the allocatable
539 // set. As far as the allocator is concerned, such RealRegs simply
540 // don't exist.
541 if rregIndex >= reg_universe.allocable {
542 continue;
543 }
544 per_real_reg[rregIndex].add_RealRange(&rlr, &frag_env);
545 }
546
547 let mut edit_list_move = Vec::<EditListItem>::new();
548 let mut edit_list_other = Vec::<EditListItem>::new();
549 if log_enabled!(Level::Debug) {
550 debug!("");
551 print_RA_state(
552 "Initial",
553 ®_universe,
554 &prioQ,
555 &per_real_reg,
556 &edit_list_move,
557 &edit_list_other,
558 &vlr_env,
559 &frag_env,
560 );
561 }
562
563 // This is also part of the running state. `vlr_slot_env` tells us the
564 // assigned spill slot for each VirtualRange, if any.
565 // `spill_slot_allocator` decides on the assignments and writes them into
566 // `vlr_slot_env`.
567 let mut vlr_slot_env = TypedIxVec::<VirtualRangeIx, Option<SpillSlot>>::new();
568 vlr_slot_env.resize(num_vlrs_initial, None);
569 let mut spill_slot_allocator = SpillSlotAllocator::new();
570
571 // Main allocation loop. Each time round, pull out the longest
572 // unallocated VirtualRange, and do one of three things:
573 //
574 // * allocate it to a RealReg, possibly by ejecting some existing
575 // allocation, but only one with a lower spill cost than this one, or
576 //
577 // * spill it. This causes the VirtualRange to disappear. It is replaced
578 // by a set of very short VirtualRanges to carry the spill and reload
579 // values. Or,
580 //
581 // * split it. This causes it to disappear but be replaced by two
582 // VirtualRanges which together constitute the original.
583 debug!("");
584 debug!("-- MAIN ALLOCATION LOOP (DI means 'direct', CO means 'coalesced'):");
585
586 info!("alloc_main: main allocation loop: begin");
587
588 // ======== BEGIN Main allocation loop ========
589 let mut num_vlrs_processed = 0; // stats only
590 let mut num_vlrs_spilled = 0; // stats only
591 let mut num_vlrs_evicted = 0; // stats only
592
593 'main_allocation_loop: loop {
594 debug!("-- still TODO {}", prioQ.len());
595 if false {
596 if log_enabled!(Level::Debug) {
597 debug!("");
598 print_RA_state(
599 "Loop Top",
600 ®_universe,
601 &prioQ,
602 &per_real_reg,
603 &edit_list_move,
604 &edit_list_other,
605 &vlr_env,
606 &frag_env,
607 );
608 debug!("");
609 }
610 }
611
612 let mb_curr_vlrix = prioQ.get_longest_VirtualRange();
613 if mb_curr_vlrix.is_none() {
614 break 'main_allocation_loop;
615 }
616
617 num_vlrs_processed += 1;
618 let curr_vlrix = mb_curr_vlrix.unwrap();
619 let curr_vlr = &vlr_env[curr_vlrix];
620
621 debug!("-- considering {:?}: {:?}", curr_vlrix, curr_vlr);
622
623 assert!(curr_vlr.vreg.to_reg().is_virtual());
624 assert!(curr_vlr.rreg.is_none());
625 let curr_vlr_regclass = curr_vlr.vreg.get_class();
626 let curr_vlr_rc = curr_vlr_regclass.rc_to_usize();
627
628 // ====== BEGIN Try to do coalescing ======
629 //
630 // First, look through the hints for `curr_vlr`, collecting up candidate
631 // real regs, in decreasing order of preference, in `hinted_regs`. Note
632 // that we don't have to consider the weights here, because the coalescing
633 // analysis phase has already sorted the hints for the VLR so as to
634 // present the most favoured (weighty) first, so we merely need to retain
635 // that ordering when copying into `hinted_regs`.
636 assert!(hints.len() == vlr_env.len());
637 let mut hinted_regs = SmallVec::<[RealReg; 8]>::new();
638
639 // === BEGIN collect all hints for `curr_vlr` ===
640 // `hints` has one entry per VLR, but only for VLRs which existed
641 // initially (viz, not for any created by spilling/splitting/whatever).
642 // Similarly, `vlrEquivClasses` can only map VLRs that existed initially,
643 // and will panic otherwise. Hence the following check:
644 if curr_vlrix.get() < hints.len() {
645 for hint in &hints[curr_vlrix] {
646 // BEGIN for each hint
647 let mb_cand = match hint {
648 Hint::SameAs(other_vlrix, _weight) => {
649 // It wants the same reg as some other VLR, but we can only honour
650 // that if the other VLR actually *has* a reg at this point. Its
651 // `rreg` field will tell us exactly that.
652 vlr_env[*other_vlrix].rreg
653 }
654 Hint::Exactly(rreg, _weight) => Some(*rreg),
655 };
656 // So now `mb_cand` might have a preferred real reg. If so, add it to
657 // the list of cands. De-dup as we go, since that is way cheaper than
658 // effectively doing the same via repeated lookups in the
659 // CommitmentMaps.
660 if let Some(rreg) = mb_cand {
661 if !hinted_regs.iter().any(|r| *r == rreg) {
662 hinted_regs.push(rreg);
663 }
664 }
665 // END for each hint
666 }
667
668 // At this point, we have in `hinted_regs`, the hint candidates that
669 // arise from copies between `curr_vlr` and its immediate neighbouring
670 // VLRs or RLRs, in order of declining preference. And that is a good
671 // start.
672 //
673 // However, it may be the case that there is some other VLR which
674 // is in the same equivalence class as `curr_vlr`, but is not a
675 // direct neighbour, and which has already been assigned a
676 // register. We really ought to take those into account too, as
677 // the least-preferred candidates. Hence we need to iterate over
678 // the equivalence class and "round up the secondary candidates."
679 //
680 // Note that the equivalence class might contain VirtualRanges
681 // that are mutually overlapping. That is handled correctly,
682 // since we always consult the relevant CommitmentMaps (in the
683 // PerRealRegs) to detect interference. To more fully understand
684 // this, see the big block comment at the top of
685 // bt_coalescing_analysis.rs.
686 let n_primary_cands = hinted_regs.len();
687
688 // Work the equivalence class set for `curr_vlrix` to pick up any
689 // rreg hints. Equivalence class info exists only for "initial" VLRs.
690 if curr_vlrix.get() < num_vlrs_initial {
691 // `curr_vlrix` is an "initial" VLR.
692 for vlrix in vlrEquivClasses.equiv_class_elems_iter(curr_vlrix) {
693 if vlrix != curr_vlrix {
694 if let Some(rreg) = vlr_env[vlrix].rreg {
695 // Add `rreg` as a cand, if we don't already have it.
696 if !hinted_regs.iter().any(|r| *r == rreg) {
697 hinted_regs.push(rreg);
698 }
699 }
700 }
701 }
702
703 // Sort the secondary cands, so as to try and impose more consistency
704 // across the group. I don't know if this is worthwhile, but it seems
705 // sensible.
706 hinted_regs[n_primary_cands..].sort_by(|rreg1, rreg2| {
707 rreg1.get_index().partial_cmp(&rreg2.get_index()).unwrap()
708 });
709 }
710
711 if log_enabled!(Level::Debug) {
712 if !hinted_regs.is_empty() {
713 let mut candStr = "pri {".to_string();
714 for (rreg, n) in hinted_regs.iter().zip(0..) {
715 if n == n_primary_cands {
716 candStr = candStr + &" } sec {".to_string();
717 }
718 candStr =
719 candStr + &" ".to_string() + ®_universe.regs[rreg.get_index()].1;
720 }
721 candStr = candStr + &" }";
722 debug!("-- CO candidates {}", candStr);
723 }
724 }
725 }
726 // === END collect all hints for `curr_vlr` ===
727
728 // === BEGIN try to use the hints for `curr_vlr` ===
729 // Now work through the list of preferences, to see if we can honour any
730 // of them.
731 for rreg in &hinted_regs {
732 let rregNo = rreg.get_index();
733
734 // Find the set of ranges which we'd have to evict in order to honour
735 // this hint. In the best case the set will be empty. In the worst
736 // case, we will get None either because (1) it would require evicting a
737 // spill range, which is disallowed so as to guarantee termination of
738 // the algorithm, or (2) because it would require evicting a real reg
739 // live range, which we can't do.
740 //
741 // We take care not to evict any range which is in the same equivalence
742 // class as `curr_vlr` since those are (by definition) connected to
743 // `curr_vlr` via V-V copies, and so evicting any of them would be
744 // counterproductive from the point of view of removing copies.
745
746 let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> =
747 per_real_reg[rregNo].find_evict_set(
748 curr_vlrix,
749 &|vlrix_to_evict| {
750 // What this means is: don't evict `vlrix_to_evict` if
751 // it is in the same equivalence class as `curr_vlrix`
752 // (the VLR which we're trying to allocate) AND we
753 // actually know the equivalence classes for both
754 // (hence the `Some`). Spill/reload ("non-original")
755 // VLRS don't have entries in `vlrEquivClasses`.
756 vlrEquivClasses.in_same_equivalence_class(vlrix_to_evict, curr_vlrix)
757 != Some(true)
758 },
759 &vlr_env,
760 );
761 if let Some((vlrixs_to_evict, total_evict_cost)) = mb_evict_info {
762 // Stay sane #1
763 assert!(curr_vlr.rreg.is_none());
764 // Stay sane #2
765 assert!(vlrixs_to_evict.is_empty() == total_evict_cost.is_zero());
766 // Can't evict if any in the set are spill ranges
767 assert!(total_evict_cost.is_finite());
768 // Ensure forward progress
769 assert!(total_evict_cost.is_less_than(&curr_vlr.spill_cost));
770 // Evict all evictees in the set
771 for vlrix_to_evict in vlrixs_to_evict.iter() {
772 // Ensure we're not evicting anything in `curr_vlrix`'s eclass.
773 // This should be guaranteed us by find_evict_set.
774 assert!(
775 vlrEquivClasses.in_same_equivalence_class(*vlrix_to_evict, curr_vlrix)
776 != Some(true)
777 );
778 // Evict ..
779 debug!(
780 "-- CO evict {:?}: {:?}",
781 *vlrix_to_evict, &vlr_env[*vlrix_to_evict]
782 );
783 per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env);
784 prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict);
785 // Directly modify bits of vlr_env. This means we have to abandon
786 // the immutable borrow for curr_vlr, but that's OK -- we won't need
787 // it again (in this loop iteration).
788 debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some());
789 vlr_env[*vlrix_to_evict].rreg = None;
790 num_vlrs_evicted += 1;
791 }
792 // .. and reassign.
793 debug!("-- CO alloc to {}", reg_universe.regs[rregNo].1);
794 per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env);
795 vlr_env[curr_vlrix].rreg = Some(*rreg);
796 // We're done!
797 continue 'main_allocation_loop;
798 }
799 } /* for rreg in hinted_regs */
800 // === END try to use the hints for `curr_vlr` ===
801
802 // ====== END Try to do coalescing ======
803
804 // We get here if we failed to find a viable assignment by the process of
805 // looking at the coalescing hints.
806 //
807 // So: do almost exactly as we did in the "try for coalescing" stage
808 // above. Except, instead of trying each coalescing candidate
809 // individually, iterate over all the registers in the class, to find the
810 // one (if any) that has the lowest total evict cost. If we find one that
811 // has zero cost -- that is, we can make the assignment without evicting
812 // anything -- then stop the search at that point, since searching further
813 // is pointless.
814
815 let (first_in_rc, last_in_rc) = match ®_universe.allocable_by_class[curr_vlr_rc] {
816 &None => {
817 return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass));
818 }
819 &Some(ref info) => (info.first, info.last),
820 };
821
822 let mut best_so_far: Option<(
823 /*rreg index*/ usize,
824 SparseSetU<[VirtualRangeIx; 4]>,
825 SpillCost,
826 )> = None;
827
828 'search_through_cand_rregs_loop: for rregNo in first_in_rc..last_in_rc + 1 {
829 //debug!("-- Cand {} ...",
830 // reg_universe.regs[rregNo].1);
831
832 let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> =
833 per_real_reg[rregNo].find_evict_set(
834 curr_vlrix,
835 // We pass a closure that ignores its arg and returns `true`.
836 // Meaning, "we are not specifying any particular
837 // can't-be-evicted VLRs in this call."
838 &|_vlrix_to_evict| true,
839 &vlr_env,
840 );
841 //
842 //match mb_evict_info {
843 // None => debug!("-- Cand {}: Unavail",
844 // reg_universe.regs[rregNo].1),
845 // Some((ref evict_set, ref evict_cost)) =>
846 // debug!("-- Cand {}: Avail, evict cost {:?}, set {:?}",
847 // reg_universe.regs[rregNo].1, evict_cost, evict_set)
848 //}
849 //
850 if let Some((cand_vlrixs_to_evict, cand_total_evict_cost)) = mb_evict_info {
851 // Stay sane ..
852 assert!(cand_vlrixs_to_evict.is_empty() == cand_total_evict_cost.is_zero());
853 // We can't evict if any in the set are spill ranges, and
854 // find_evict_set should not offer us that possibility.
855 assert!(cand_total_evict_cost.is_finite());
856 // Ensure forward progress
857 assert!(cand_total_evict_cost.is_less_than(&curr_vlr.spill_cost));
858 // Update the "best so far". First, if the evict set is empty, then
859 // the candidate is by definition better than all others, and we will
860 // quit searching just below.
861 let mut cand_is_better = cand_vlrixs_to_evict.is_empty();
862 if !cand_is_better {
863 if let Some((_, _, best_spill_cost)) = best_so_far {
864 // If we've already got a candidate, this one is better if it has
865 // a lower total spill cost.
866 if cand_total_evict_cost.is_less_than(&best_spill_cost) {
867 cand_is_better = true;
868 }
869 } else {
870 // We don't have any candidate so far, so what we just got is
871 // better (than nothing).
872 cand_is_better = true;
873 }
874 }
875 // Remember the candidate if required.
876 let cand_vlrixs_to_evict_is_empty = cand_vlrixs_to_evict.is_empty();
877 if cand_is_better {
878 best_so_far = Some((rregNo, cand_vlrixs_to_evict, cand_total_evict_cost));
879 }
880 // If we've found a no-evictions-necessary candidate, quit searching
881 // immediately. We won't find anything better.
882 if cand_vlrixs_to_evict_is_empty {
883 break 'search_through_cand_rregs_loop;
884 }
885 }
886 } // for rregNo in first_in_rc..last_in_rc + 1 {
887
888 // Examine the results of the search. Did we find any usable candidate?
889 if let Some((rregNo, vlrixs_to_evict, total_spill_cost)) = best_so_far {
890 // We are still Totally Paranoid (tm)
891 // Stay sane #1
892 debug_assert!(curr_vlr.rreg.is_none());
893 // Can't spill a spill range
894 assert!(total_spill_cost.is_finite());
895 // Ensure forward progress
896 assert!(total_spill_cost.is_less_than(&curr_vlr.spill_cost));
897 // Now the same evict-reassign section as with the coalescing logic above.
898 // Evict all evictees in the set
899 for vlrix_to_evict in vlrixs_to_evict.iter() {
900 // Evict ..
901 debug!(
902 "-- DI evict {:?}: {:?}",
903 *vlrix_to_evict, &vlr_env[*vlrix_to_evict]
904 );
905 per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env);
906 prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict);
907 debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some());
908 vlr_env[*vlrix_to_evict].rreg = None;
909 num_vlrs_evicted += 1;
910 }
911 // .. and reassign.
912 debug!("-- DI alloc to {}", reg_universe.regs[rregNo].1);
913 per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env);
914 let rreg = reg_universe.regs[rregNo].0;
915 vlr_env[curr_vlrix].rreg = Some(rreg);
916 // We're done!
917 continue 'main_allocation_loop;
918 }
919
920 // Still no luck. We can't find a register to put it in, so we'll
921 // have to spill it, since splitting it isn't yet implemented.
922 debug!("-- spill");
923
924 // If the live range already pertains to a spill or restore, then
925 // it's Game Over. The constraints (availability of RealRegs vs
926 // requirement for them) are impossible to fulfill, and so we cannot
927 // generate any valid allocation for this function.
928 if curr_vlr.spill_cost.is_infinite() {
929 return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass));
930 }
931
932 // Generate a new spill slot number, S
933 /* Spilling vreg V with virtual live range VirtualRange to slot S:
934 for each frag F in VirtualRange {
935 for each insn I in F.first.iix .. F.last.iix {
936 if I does not mention V
937 continue
938 if I mentions V in a Read role {
939 // invar: I cannot mention V in a Mod role
940 add new VirtualRange I.reload -> I.use,
941 vreg V, spillcost Inf
942 add eli @ I.reload V SpillSlot
943 }
944 if I mentions V in a Mod role {
945 // invar: I cannot mention V in a Read or Write Role
946 add new VirtualRange I.reload -> I.spill,
947 Vreg V, spillcost Inf
948 add eli @ I.reload V SpillSlot
949 add eli @ I.spill SpillSlot V
950 }
951 if I mentions V in a Write role {
952 // invar: I cannot mention V in a Mod role
953 add new VirtualRange I.def -> I.spill,
954 vreg V, spillcost Inf
955 add eli @ I.spill V SpillSlot
956 }
957 }
958 }
959 */
960
961 // We will be spilling vreg `curr_vlr.reg` with VirtualRange `curr_vlr` to
962 // a spill slot that the spill slot allocator will choose for us, just a
963 // bit further down this function.
964
965 // This holds enough info to create reload or spill (or both)
966 // instructions around an instruction that references a VirtualReg
967 // that has been spilled.
968 struct SpillAndOrReloadInfo {
969 bix: BlockIx, // that `iix` is in
970 iix: InstIx, // this is the Inst we are spilling/reloading for
971 kind: BridgeKind, // says whether to create a spill or reload or both
972 }
973 let mut sri_vec = Vec::<SpillAndOrReloadInfo>::new();
974 let curr_vlr_vreg = curr_vlr.vreg;
975 let curr_vlr_reg = curr_vlr_vreg.to_reg();
976
977 for frag in &curr_vlr.sorted_frags.frags {
978 for iix in frag.first.iix().dotdot(frag.last.iix().plus(1)) {
979 let (iix_uses_curr_vlr_reg, iix_defs_curr_vlr_reg, iix_mods_curr_vlr_reg) =
980 does_inst_use_def_or_mod_reg(®_vecs_and_bounds, iix, curr_vlr_reg);
981 // If this insn doesn't mention the vreg we're spilling for,
982 // move on.
983 if !iix_defs_curr_vlr_reg && !iix_mods_curr_vlr_reg && !iix_uses_curr_vlr_reg {
984 continue;
985 }
986 // USES: Do we need to create a reload-to-use bridge
987 // (VirtualRange) ?
988 if iix_uses_curr_vlr_reg && frag.contains(&InstPoint::new_use(iix)) {
989 debug_assert!(!iix_mods_curr_vlr_reg);
990 // Stash enough info that we can create a new VirtualRange
991 // and a new edit list entry for the reload.
992 let bix = inst_to_block_map.map(iix);
993 let sri = SpillAndOrReloadInfo {
994 bix,
995 iix,
996 kind: BridgeKind::RtoU,
997 };
998 sri_vec.push(sri);
999 }
1000 // MODS: Do we need to create a reload-to-spill bridge? This
1001 // can only happen for instructions which modify a register.
1002 // Note this has to be a single VirtualRange, since if it were
1003 // two (one for the reload, one for the spill) they could
1004 // later end up being assigned to different RealRegs, which is
1005 // obviously nonsensical.
1006 if iix_mods_curr_vlr_reg
1007 && frag.contains(&InstPoint::new_use(iix))
1008 && frag.contains(&InstPoint::new_def(iix))
1009 {
1010 debug_assert!(!iix_uses_curr_vlr_reg);
1011 debug_assert!(!iix_defs_curr_vlr_reg);
1012 let bix = inst_to_block_map.map(iix);
1013 let sri = SpillAndOrReloadInfo {
1014 bix,
1015 iix,
1016 kind: BridgeKind::RtoS,
1017 };
1018 sri_vec.push(sri);
1019 }
1020 // DEFS: Do we need to create a def-to-spill bridge?
1021 if iix_defs_curr_vlr_reg && frag.contains(&InstPoint::new_def(iix)) {
1022 debug_assert!(!iix_mods_curr_vlr_reg);
1023 let bix = inst_to_block_map.map(iix);
1024 let sri = SpillAndOrReloadInfo {
1025 bix,
1026 iix,
1027 kind: BridgeKind::DtoS,
1028 };
1029 sri_vec.push(sri);
1030 }
1031 }
1032 }
1033
1034 // Now that we no longer need to access `frag_env` or `vlr_env` for
1035 // the remainder of this iteration of the main allocation loop, we can
1036 // actually generate the required spill/reload artefacts.
1037
1038 // First off, poke the spill slot allocator to get an intelligent choice
1039 // of slot. Note that this will fail for "non-initial" VirtualRanges; but
1040 // the only non-initial ones will have been created by spilling anyway,
1041 // any we definitely shouldn't be trying to spill them again. Hence we
1042 // can assert.
1043 assert!(vlr_slot_env.len() == num_vlrs_initial);
1044 assert!(curr_vlrix < VirtualRangeIx::new(num_vlrs_initial));
1045 if vlr_slot_env[curr_vlrix].is_none() {
1046 // It hasn't been decided yet. Cause it to be so by asking for an
1047 // allocation for the entire eclass that `curr_vlrix` belongs to.
1048 spill_slot_allocator.alloc_spill_slots(
1049 &mut vlr_slot_env,
1050 func,
1051 &vlr_env,
1052 &vlrEquivClasses,
1053 curr_vlrix,
1054 );
1055 assert!(vlr_slot_env[curr_vlrix].is_some());
1056 }
1057 let spill_slot_to_use = vlr_slot_env[curr_vlrix].unwrap();
1058
1059 for sri in sri_vec {
1060 let (new_vlr_first_pt, new_vlr_last_pt) = match sri.kind {
1061 BridgeKind::RtoU => (Point::Reload, Point::Use),
1062 BridgeKind::RtoS => (Point::Reload, Point::Spill),
1063 BridgeKind::DtoS => (Point::Def, Point::Spill),
1064 };
1065 let new_vlr_frag = RangeFrag {
1066 first: InstPoint::new(sri.iix, new_vlr_first_pt),
1067 last: InstPoint::new(sri.iix, new_vlr_last_pt),
1068 };
1069 debug!("-- new RangeFrag {:?}", &new_vlr_frag);
1070 let new_vlr_sfrags = SortedRangeFrags::unit(new_vlr_frag);
1071 let new_vlr = VirtualRange {
1072 vreg: curr_vlr_vreg,
1073 rreg: None,
1074 sorted_frags: new_vlr_sfrags,
1075 size: 1,
1076 // Effectively infinite. We'll never look at this again anyway.
1077 total_cost: 0xFFFF_FFFFu32,
1078 spill_cost: SpillCost::infinite(),
1079 };
1080 let new_vlrix = VirtualRangeIx::new(vlr_env.len() as u32);
1081 debug!(
1082 "-- new VirtRange {:?} := {:?}",
1083 new_vlrix, &new_vlr
1084 );
1085 vlr_env.push(new_vlr);
1086 prioQ.add_VirtualRange(&vlr_env, new_vlrix);
1087
1088 // BEGIN (optimisation only) see if we can create any coalescing hints
1089 // for this new VLR.
1090 let mut new_vlr_hint = SmallVec::<[Hint; 8]>::new();
1091 if is_vv_boundary_move[sri.iix] {
1092 // Collect the src and dst regs for the move. It must be a
1093 // move because `is_vv_boundary_move` claims that to be true.
1094 let im = func.is_move(&func.get_insn(sri.iix));
1095 assert!(im.is_some());
1096 let (wdst_reg, src_reg): (Writable<Reg>, Reg) = im.unwrap();
1097 let dst_reg: Reg = wdst_reg.to_reg();
1098 assert!(src_reg.is_virtual() && dst_reg.is_virtual());
1099 let dst_vreg: VirtualReg = dst_reg.to_virtual_reg();
1100 let src_vreg: VirtualReg = src_reg.to_virtual_reg();
1101 let bridge_eef = est_freqs[sri.bix];
1102 match sri.kind {
1103 BridgeKind::RtoU => {
1104 // Reload-to-Use bridge. Hint that we want to be
1105 // allocated to the same reg as the destination of the
1106 // move. That means we have to find the VLR that owns
1107 // the destination vreg.
1108 for vlrix in &vreg_to_vlrs_map[dst_vreg.get_index()] {
1109 if vlr_env[*vlrix].vreg == dst_vreg {
1110 new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
1111 break;
1112 }
1113 }
1114 }
1115 BridgeKind::DtoS => {
1116 // Def-to-Spill bridge. Hint that we want to be
1117 // allocated to the same reg as the source of the
1118 // move.
1119 for vlrix in &vreg_to_vlrs_map[src_vreg.get_index()] {
1120 if vlr_env[*vlrix].vreg == src_vreg {
1121 new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
1122 break;
1123 }
1124 }
1125 }
1126 BridgeKind::RtoS => {
1127 // A Reload-to-Spill bridge. This can't happen. It
1128 // implies that the instruction modifies (both reads
1129 // and writes) one of its operands, but the client's
1130 // `is_move` function claims this instruction is a
1131 // plain "move" (reads source, writes dest). These
1132 // claims are mutually contradictory.
1133 panic!("RtoS bridge for v-v boundary move");
1134 }
1135 }
1136 }
1137 hints.push(new_vlr_hint);
1138 // END see if we can create any coalescing hints for this new VLR.
1139
1140 // Finally, create a new EditListItem. This holds enough
1141 // information that a suitable spill or reload instruction can
1142 // later be created.
1143 let new_eli = EditListItem {
1144 slot: spill_slot_to_use,
1145 vlrix: new_vlrix,
1146 kind: sri.kind,
1147 iix: sri.iix,
1148 };
1149 if is_vv_boundary_move[sri.iix] {
1150 debug!("-- new ELI MOVE {:?}", &new_eli);
1151 edit_list_move.push(new_eli);
1152 } else {
1153 debug!("-- new ELI other {:?}", &new_eli);
1154 edit_list_other.push(new_eli);
1155 }
1156 }
1157
1158 num_vlrs_spilled += 1;
1159 // And implicitly "continue 'main_allocation_loop"
1160 }
1161 // ======== END Main allocation loop ========
1162
1163 info!("alloc_main: main allocation loop: end");
1164
1165 if log_enabled!(Level::Debug) {
1166 debug!("");
1167 print_RA_state(
1168 "Final",
1169 ®_universe,
1170 &prioQ,
1171 &per_real_reg,
1172 &edit_list_move,
1173 &edit_list_other,
1174 &vlr_env,
1175 &frag_env,
1176 );
1177 }
1178
1179 // ======== BEGIN Do spill slot coalescing ========
1180
1181 debug!("");
1182 info!("alloc_main: create spills_n_reloads for MOVE insns");
1183
1184 // Sort `edit_list_move` by the insn with which each item is associated.
1185 edit_list_move.sort_unstable_by(|eli1, eli2| eli1.iix.cmp(&eli2.iix));
1186
1187 // Now go through `edit_list_move` and find pairs which constitute a
1188 // spillslot-to-the-same-spillslot move. What we have in `edit_list_move` is
1189 // heavily constrained, as follows:
1190 //
1191 // * each entry should reference an InstIx which the coalescing analysis
1192 // identified as a virtual-to-virtual copy which exists at the boundary
1193 // between two VirtualRanges. The "boundary" aspect is important; we
1194 // can't coalesce out moves in which the source vreg is not the "last use"
1195 // or for which the destination vreg is not the "first def". (The same is
1196 // true for coalescing of unspilled moves).
1197 //
1198 // * the each entry must reference a VirtualRange which has only a single
1199 // RangeFrag, and that frag must exist entirely "within" the referenced
1200 // instruction. Specifically, it may only be a R->U frag (bridge) or a
1201 // D->S frag.
1202 //
1203 // * For a referenced instruction, there may be at most two entries in this
1204 // list: one that references the R->U frag and one that references the
1205 // D->S frag. Furthermore, the two entries must carry values of the same
1206 // RegClass; if that isn't true, the client's `is_move` function is
1207 // defective.
1208 //
1209 // For any such pair identified, if both frags mention the same spill slot,
1210 // we skip generating both the reload and the spill instruction. We also
1211 // note that the instruction itself is to be deleted (converted to a
1212 // zero-len nop). In a sense we have "cancelled out" a reload/spill pair.
1213 // Entries that can't be cancelled out are handled the same way as for
1214 // entries in `edit_list_other`, by simply copying them there.
1215 //
1216 // Since `edit_list_move` is sorted by insn ix, we can scan linearly over
1217 // it, looking for adjacent pairs. We'll have to accept them in either
1218 // order though (first R->U then D->S, or the other way round). There's no
1219 // fixed ordering since there is no particular ordering in the way
1220 // VirtualRanges are allocated.
1221
1222 // As a result of spill slot coalescing, we'll need to delete the move
1223 // instructions leading to a mergable spill slot move. The insn stream
1224 // editor can't delete instructions, so instead it'll replace them with zero
1225 // len nops obtained from the client. `iixs_to_nop_out` records the insns
1226 // that this has to happen to. It is in increasing order of InstIx (because
1227 // `edit_list_sorted` is), and the indices in it refer to the original
1228 // virtual-registerised code.
1229 let mut iixs_to_nop_out = Vec::<InstIx>::new();
1230
1231 let n_edit_list_move = edit_list_move.len();
1232 let mut n_edit_list_move_processed = 0; // for assertions only
1233 let mut i_min = 0;
1234 loop {
1235 if i_min >= n_edit_list_move {
1236 break;
1237 }
1238 // Find the bounds of the current group.
1239 debug!("editlist entry (MOVE): min: {:?}", &edit_list_move[i_min]);
1240 let i_min_iix = edit_list_move[i_min].iix;
1241 let mut i_max = i_min;
1242 while i_max + 1 < n_edit_list_move && edit_list_move[i_max + 1].iix == i_min_iix {
1243 i_max += 1;
1244 debug!("editlist entry (MOVE): max: {:?}", &edit_list_move[i_max]);
1245 }
1246 // Current group is from i_min to i_max inclusive. At most 2 entries are
1247 // allowed per group.
1248 assert!(i_max - i_min <= 1);
1249 // Check for a mergeable pair.
1250 if i_max - i_min == 1 {
1251 assert!(is_vv_boundary_move[i_min_iix]);
1252 let vlrix1 = edit_list_move[i_min].vlrix;
1253 let vlrix2 = edit_list_move[i_max].vlrix;
1254 assert!(vlrix1 != vlrix2);
1255 let vlr1 = &vlr_env[vlrix1];
1256 let vlr2 = &vlr_env[vlrix2];
1257 let frags1 = &vlr1.sorted_frags;
1258 let frags2 = &vlr2.sorted_frags;
1259 assert!(frags1.frags.len() == 1);
1260 assert!(frags2.frags.len() == 1);
1261 let frag1 = &frags1.frags[0];
1262 let frag2 = &frags2.frags[0];
1263 assert!(frag1.first.iix() == i_min_iix);
1264 assert!(frag1.last.iix() == i_min_iix);
1265 assert!(frag2.first.iix() == i_min_iix);
1266 assert!(frag2.last.iix() == i_min_iix);
1267 // frag1 must be R->U and frag2 must be D->S, or vice versa
1268 match (
1269 frag1.first.pt(),
1270 frag1.last.pt(),
1271 frag2.first.pt(),
1272 frag2.last.pt(),
1273 ) {
1274 (Point::Reload, Point::Use, Point::Def, Point::Spill)
1275 | (Point::Def, Point::Spill, Point::Reload, Point::Use) => {
1276 let slot1 = edit_list_move[i_min].slot;
1277 let slot2 = edit_list_move[i_max].slot;
1278 if slot1 == slot2 {
1279 // Yay. We've found a coalescable pair. We can just ignore the
1280 // two entries and move on. Except we have to mark the insn
1281 // itself for deletion.
1282 debug!("editlist entry (MOVE): delete {:?}", i_min_iix);
1283 iixs_to_nop_out.push(i_min_iix);
1284 i_min = i_max + 1;
1285 n_edit_list_move_processed += 2;
1286 continue;
1287 }
1288 }
1289 (_, _, _, _) => {
1290 panic!("spill slot coalescing, edit_list_move: unexpected frags");
1291 }
1292 }
1293 }
1294 // If we get here for whatever reason, this group is uninteresting. Copy
1295 // in to `edit_list_other` so that it gets processed in the normal way.
1296 for i in i_min..=i_max {
1297 edit_list_other.push(edit_list_move[i]);
1298 n_edit_list_move_processed += 1;
1299 }
1300 i_min = i_max + 1;
1301 }
1302 assert!(n_edit_list_move_processed == n_edit_list_move);
1303
1304 // ======== END Do spill slot coalescing ========
1305
1306 // ======== BEGIN Create all other spills and reloads ========
1307
1308 debug!("");
1309 info!("alloc_main: create spills_n_reloads for other insns");
1310
1311 // Reload and spill instructions are missing. To generate them, go through
1312 // the "edit list", which contains info on both how to generate the
1313 // instructions, and where to insert them.
1314 let mut spills_n_reloads = Vec::<InstToInsertAndPoint>::new();
1315 let mut num_spills = 0; // stats only
1316 let mut num_reloads = 0; // stats only
1317 for eli in &edit_list_other {
1318 debug!("editlist entry (other): {:?}", eli);
1319 let vlr = &vlr_env[eli.vlrix];
1320 let vlr_sfrags = &vlr.sorted_frags;
1321 assert!(vlr_sfrags.frags.len() == 1);
1322 let vlr_frag = &vlr_sfrags.frags[0];
1323 let rreg = vlr.rreg.expect("Gen of spill/reload: reg not assigned?!");
1324 let vreg = vlr.vreg;
1325 match eli.kind {
1326 BridgeKind::RtoU => {
1327 debug_assert!(vlr_frag.first.pt().is_reload());
1328 debug_assert!(vlr_frag.last.pt().is_use());
1329 debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
1330 let insnR = InstToInsert::Reload {
1331 to_reg: Writable::from_reg(rreg),
1332 from_slot: eli.slot,
1333 for_vreg: vreg,
1334 };
1335 let whereToR = vlr_frag.first;
1336 spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
1337 num_reloads += 1;
1338 }
1339 BridgeKind::RtoS => {
1340 debug_assert!(vlr_frag.first.pt().is_reload());
1341 debug_assert!(vlr_frag.last.pt().is_spill());
1342 debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
1343 let insnR = InstToInsert::Reload {
1344 to_reg: Writable::from_reg(rreg),
1345 from_slot: eli.slot,
1346 for_vreg: vreg,
1347 };
1348 let whereToR = vlr_frag.first;
1349 let insnS = InstToInsert::Spill {
1350 to_slot: eli.slot,
1351 from_reg: rreg,
1352 for_vreg: vreg,
1353 };
1354 let whereToS = vlr_frag.last;
1355 spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
1356 spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
1357 num_reloads += 1;
1358 num_spills += 1;
1359 }
1360 BridgeKind::DtoS => {
1361 debug_assert!(vlr_frag.first.pt().is_def());
1362 debug_assert!(vlr_frag.last.pt().is_spill());
1363 debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
1364 let insnS = InstToInsert::Spill {
1365 to_slot: eli.slot,
1366 from_reg: rreg,
1367 for_vreg: vreg,
1368 };
1369 let whereToS = vlr_frag.last;
1370 spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
1371 num_spills += 1;
1372 }
1373 }
1374 }
1375
1376 // ======== END Create all other spills and reloads ========
1377
1378 // ======== BEGIN Create final instruction stream ========
1379
1380 // Gather up a vector of (RangeFrag, VirtualReg, RealReg) resulting from
1381 // the previous phase. This fundamentally is the result of the allocation
1382 // and tells us how the instruction stream must be edited. Note it does
1383 // not take account of spill or reload instructions. Dealing with those
1384 // is relatively simple and happens later.
1385
1386 info!("alloc_main: create frag_map");
1387
1388 let mut frag_map = Vec::<(RangeFrag, VirtualReg, RealReg)>::new();
1389 // For each real register under our control ..
1390 for i in 0..reg_universe.allocable {
1391 let rreg = reg_universe.regs[i].0;
1392 // .. look at all the VirtualRanges assigned to it. And for each such
1393 // VirtualRange ..
1394 for vlrix_assigned in per_real_reg[i].vlrixs_assigned.iter() {
1395 let VirtualRange {
1396 vreg, sorted_frags, ..
1397 } = &vlr_env[*vlrix_assigned];
1398 // All the RangeFrags in `vlr_assigned` require `vlr_assigned.reg`
1399 // to be mapped to the real reg `i`
1400 // .. collect up all its constituent RangeFrags.
1401 for frag in &sorted_frags.frags {
1402 frag_map.push((frag.clone(), *vreg, rreg));
1403 }
1404 }
1405 }
1406
1407 info!("alloc_main: edit_inst_stream");
1408
1409 let final_insns_and_targetmap__or_err = edit_inst_stream(
1410 func,
1411 spills_n_reloads,
1412 &iixs_to_nop_out,
1413 frag_map,
1414 ®_universe,
1415 use_checker,
1416 );
1417
1418 // ======== END Create final instruction stream ========
1419
1420 // ======== BEGIN Create the RegAllocResult ========
1421
1422 match final_insns_and_targetmap__or_err {
1423 Ok((ref final_insns, ..)) => {
1424 info!(
1425 "alloc_main: out: VLRs: {} initially, {} processed",
1426 num_vlrs_initial, num_vlrs_processed
1427 );
1428 info!(
1429 "alloc_main: out: VLRs: {} evicted, {} spilled",
1430 num_vlrs_evicted, num_vlrs_spilled
1431 );
1432 info!(
1433 "alloc_main: out: insns: {} total, {} spills, {} reloads, {} nopzs",
1434 final_insns.len(),
1435 num_spills,
1436 num_reloads,
1437 iixs_to_nop_out.len()
1438 );
1439 info!(
1440 "alloc_main: out: spill slots: {} used",
1441 spill_slot_allocator.num_slots_in_use()
1442 );
1443 }
1444 Err(_) => {
1445 info!("alloc_main: allocation failed!");
1446 }
1447 }
1448
1449 let (final_insns, target_map, orig_insn_map) = match final_insns_and_targetmap__or_err {
1450 Err(e) => {
1451 info!("alloc_main: fail");
1452 return Err(e);
1453 }
1454 Ok(pair) => {
1455 info!("alloc_main: creating RegAllocResult");
1456 pair
1457 }
1458 };
1459
1460 // Compute clobbered registers with one final, quick pass.
1461 //
1462 // FIXME: derive this information directly from the allocation data
1463 // structures used above.
1464 //
1465 // NB at this point, the `san_reg_uses` that was computed in the analysis
1466 // phase is no longer valid, because we've added and removed instructions to
1467 // the function relative to the one that `san_reg_uses` was computed from,
1468 // so we have to re-visit all insns with `add_raw_reg_vecs_for_insn`.
1469 // That's inefficient, but we don't care .. this should only be a temporary
1470 // fix.
1471
1472 let mut clobbered_registers: Set<RealReg> = Set::empty();
1473
1474 // We'll dump all the reg uses in here. We don't care the bounds, so just
1475 // pass a dummy one in the loop.
1476 let mut reg_vecs = RegVecs::new(/*sanitized=*/ false);
1477 let mut dummy_bounds = RegVecBounds::new();
1478 for insn in &final_insns {
1479 add_raw_reg_vecs_for_insn::<F>(insn, &mut reg_vecs, &mut dummy_bounds);
1480 }
1481 for reg in reg_vecs.defs.iter().chain(reg_vecs.mods.iter()) {
1482 assert!(reg.is_real());
1483 clobbered_registers.insert(reg.to_real_reg());
1484 }
1485
1486 // And now remove from the set, all those not available to the allocator.
1487 // But not removing the reserved regs, since we might have modified those.
1488 clobbered_registers.filter_map(|®| {
1489 if reg.get_index() >= reg_universe.allocable {
1490 None
1491 } else {
1492 Some(reg)
1493 }
1494 });
1495
1496 assert!(est_freqs.len() as usize == func.blocks().len());
1497 let mut block_annotations = None;
1498 if opts.request_block_annotations {
1499 let mut anns = TypedIxVec::<BlockIx, Vec<String>>::new();
1500 for (estFreq, i) in est_freqs.iter().zip(0..) {
1501 let bix = BlockIx::new(i);
1502 let ef_str = format!("RA: bix {:?}, estFreq {}", bix, estFreq);
1503 anns.push(vec![ef_str]);
1504 }
1505 block_annotations = Some(anns);
1506 }
1507
1508 let ra_res = RegAllocResult {
1509 insns: final_insns,
1510 target_map,
1511 orig_insn_map,
1512 clobbered_registers,
1513 num_spill_slots: spill_slot_allocator.num_slots_in_use() as u32,
1514 block_annotations,
1515 };
1516
1517 info!("alloc_main: end");
1518
1519 // ======== END Create the RegAllocResult ========
1520
1521 Ok(ra_res)
1522 }
1523