1 #![allow(non_snake_case)]
2 #![allow(non_camel_case_types)]
3
4 //! Allocation of spill slots for the backtracking allocator.
5
6 use crate::avl_tree::{AVLTree, AVL_NULL};
7 use crate::data_structures::{
8 cmp_range_frags, InstPoint, RangeFrag, SortedRangeFrags, SpillSlot, TypedIxVec, VirtualRange,
9 VirtualRangeIx,
10 };
11 use crate::union_find::UnionFindEquivClasses;
12 use crate::Function;
13
14 //=============================================================================
15 // A spill slot allocator. This could be implemented more simply than it is.
16 // The reason for the extra complexity is to support copy-coalescing at the
17 // spill-slot level. That is, it tries make it possible to allocate all
18 // members of a VirtualRange group to the same spill slot, so that moves
19 // between two spilled members of the same group can be turned into no-ops.
20 //
21 // All of the `size` metrics in this bit are in terms of "logical spill slot
22 // units", per the interface's description for `get_spillslot_size`.
23
24 // *** Important: to fully understand this allocator and how it interacts with
25 // coalescing analysis, you need to read the big block comment at the top of
26 // bt_coalescing_analysis.rs.
27
28 //=============================================================================
29 // Logical spill slots
30
31 // In the trees, we keep track of which frags are reftyped, so we can later create stackmaps by
32 // slicing all of the trees at some `InstPoint`. Unfortunately this requires storing 65 bits of
33 // data in each node -- 64 bits for the RangeFrag and 1 bit for the reftype. A TODO would be to
34 // steal one bit from the RangeFrag. For now though, we do the simple thing.
35
36 #[derive(Clone, PartialEq, PartialOrd)]
37 struct RangeFragAndRefness {
38 frag: RangeFrag,
39 is_ref: bool,
40 }
41 impl RangeFragAndRefness {
new(frag: RangeFrag, is_ref: bool) -> Self42 fn new(frag: RangeFrag, is_ref: bool) -> Self {
43 Self { frag, is_ref }
44 }
45 }
46
47 // We keep one of these for every "logical spill slot" in use.
48 enum LogicalSpillSlot {
49 // This slot is in use and can hold values of size `size` (only). Note that
50 // `InUse` may only appear in `SpillSlotAllocator::slots` positions that
51 // have indices that are 0 % `size`. Furthermore, after such an entry in
52 // `SpillSlotAllocator::slots`, the next `size` - 1 entries must be
53 // `Unavail`. This is a hard invariant, violation of which will cause
54 // overlapping spill slots and potential chaos.
55 InUse {
56 size: u32,
57 tree: AVLTree<RangeFragAndRefness>,
58 },
59 // This slot is unavailable, as described above. It's unavailable because
60 // it holds some part of the values associated with the nearest lower
61 // numbered entry which isn't `Unavail`, and that entry must be an `InUse`
62 // entry.
63 Unavail,
64 }
65 impl LogicalSpillSlot {
is_Unavail(&self) -> bool66 fn is_Unavail(&self) -> bool {
67 match self {
68 LogicalSpillSlot::Unavail => true,
69 _ => false,
70 }
71 }
is_InUse(&self) -> bool72 fn is_InUse(&self) -> bool {
73 !self.is_Unavail()
74 }
get_tree(&self) -> &AVLTree<RangeFragAndRefness>75 fn get_tree(&self) -> &AVLTree<RangeFragAndRefness> {
76 match self {
77 LogicalSpillSlot::InUse { ref tree, .. } => tree,
78 LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_tree"),
79 }
80 }
get_mut_tree(&mut self) -> &mut AVLTree<RangeFragAndRefness>81 fn get_mut_tree(&mut self) -> &mut AVLTree<RangeFragAndRefness> {
82 match self {
83 LogicalSpillSlot::InUse { ref mut tree, .. } => tree,
84 LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_mut_tree"),
85 }
86 }
get_size(&self) -> u3287 fn get_size(&self) -> u32 {
88 match self {
89 LogicalSpillSlot::InUse { size, .. } => *size,
90 LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_size"),
91 }
92 }
93 // If this spill slot is occupied at `pt`, return the refness of the value (VirtualRange)
94 // stored in it. This is conceptually equivalent to CommitmentMap::lookup_inst_point.
get_refness_at_inst_point(&self, pt: InstPoint) -> Option<bool>95 fn get_refness_at_inst_point(&self, pt: InstPoint) -> Option<bool> {
96 match self {
97 LogicalSpillSlot::InUse { size: 1, tree } => {
98 // Search the tree to see if a reffy commitment intersects `pt`.
99 let mut root = tree.root;
100 while root != AVL_NULL {
101 let root_node = &tree.pool[root as usize];
102 let root_item = &root_node.item;
103 if pt < root_item.frag.first {
104 // `pt` is to the left of the `root`. So there's no
105 // overlap with `root`. Continue by inspecting the left subtree.
106 root = root_node.left;
107 } else if root_item.frag.last < pt {
108 // Ditto for the right subtree.
109 root = root_node.right;
110 } else {
111 // `pt` overlaps the `root`, so we have what we want.
112 return Some(root_item.is_ref);
113 }
114 }
115 None
116 }
117 LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
118 // Slot isn't is use, or is in use but for values of some non-ref size
119 None
120 }
121 }
122 }
123 }
124
125 // HELPER FUNCTION
126 // Find out whether it is possible to add `frag` to `tree`.
127 #[inline(always)]
ssal_is_add_frag_possible(tree: &AVLTree<RangeFragAndRefness>, frag: &RangeFrag) -> bool128 fn ssal_is_add_frag_possible(tree: &AVLTree<RangeFragAndRefness>, frag: &RangeFrag) -> bool {
129 // BEGIN check `frag` for any overlap against `tree`.
130 let mut root = tree.root;
131 while root != AVL_NULL {
132 let root_node = &tree.pool[root as usize];
133 let root_item = &root_node.item;
134 if frag.last < root_item.frag.first {
135 // `frag` is entirely to the left of the `root`. So there's no
136 // overlap with root. Continue by inspecting the left subtree.
137 root = root_node.left;
138 } else if root_item.frag.last < frag.first {
139 // Ditto for the right subtree.
140 root = root_node.right;
141 } else {
142 // `frag` overlaps the `root`. Give up.
143 return false;
144 }
145 }
146 // END check `frag` for any overlap against `tree`.
147 // `frag` doesn't overlap.
148 true
149 }
150
151 // HELPER FUNCTION
152 // Find out whether it is possible to add all of `frags` to `tree`. Returns
153 // true if possible, false if not. This routine relies on the fact that
154 // SortedFrags is non-overlapping. However, this is a bit subtle. We know
155 // that both `tree` and `frags` individually are non-overlapping, but there's
156 // no guarantee that elements of `frags` don't overlap `tree`. Hence we have
157 // to do a custom walk of `tree` to check for overlap; we can't just use
158 // `AVLTree::contains`.
ssal_is_add_possible(tree: &AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool159 fn ssal_is_add_possible(tree: &AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
160 // Figure out whether all the frags will go in.
161 for frag in &frags.frags {
162 if !ssal_is_add_frag_possible(&tree, frag) {
163 return false;
164 }
165 // `frag` doesn't overlap. Move on to the next one.
166 }
167 true
168 }
169
170 // HELPER FUNCTION
171 // Try to add all of `frags` to `tree`. Return `true` if possible, `false` if not possible. If
172 // `false` is returned, `tree` is unchanged (this is important). This routine relies on the
173 // fact that SortedFrags is non-overlapping. They are initially all marked as non-reffy. That
174 // may later be changed by calls to `SpillSlotAllocator::notify_spillage_of_reftyped_vlr`.
ssal_add_if_possible(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool175 fn ssal_add_if_possible(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
176 // Check if all the frags will go in.
177 if !ssal_is_add_possible(tree, frags) {
178 return false;
179 }
180 // They will. So now insert them.
181 for frag in &frags.frags {
182 let inserted = tree.insert(
183 RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false),
184 Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
185 cmp_range_frags(&item1.frag, &item2.frag)
186 }),
187 );
188 // This can't fail
189 assert!(inserted);
190 }
191 true
192 }
193
194 // HELPER FUNCTION
195 // Let `frags` be the RangeFrags for some VirtualRange, that have already been allocated in
196 // `tree`. Mark each such RangeFrag as reffy.
ssal_mark_frags_as_reftyped(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags)197 fn ssal_mark_frags_as_reftyped(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) {
198 for frag in &frags.frags {
199 // Be paranoid. (1) `frag` must already exist in `tree`. (2) it must not be marked as
200 // reffy.
201 let del_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false);
202 let add_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ true);
203 let replaced_ok = tree.find_and_replace(
204 del_this,
205 add_this,
206 &|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
207 cmp_range_frags(&item1.frag, &item2.frag)
208 },
209 );
210 // This assertion effectively encompasses both (1) and (2) above.
211 assert!(replaced_ok);
212 }
213 }
214
215 //=============================================================================
216 // SpillSlotAllocator: public interface
217
218 pub struct SpillSlotAllocator {
219 slots: Vec<LogicalSpillSlot>,
220 }
221 impl SpillSlotAllocator {
new() -> Self222 pub fn new() -> Self {
223 Self { slots: vec![] }
224 }
225
num_slots_in_use(&self) -> usize226 pub fn num_slots_in_use(&self) -> usize {
227 self.slots.len()
228 }
229
230 // This adds a new, empty slot, for items of the given size, and returns
231 // its index. This isn't clever, in the sense that it fails to use some
232 // slots that it could use, but at least it's simple. Note, this is a
233 // private method.
add_new_slot(&mut self, req_size: u32) -> u32234 fn add_new_slot(&mut self, req_size: u32) -> u32 {
235 assert!(req_size == 1 || req_size == 2 || req_size == 4 || req_size == 8);
236 // Satisfy alignment constraints. These entries will unfortunately be
237 // wasted (never used).
238 while self.slots.len() % (req_size as usize) != 0 {
239 self.slots.push(LogicalSpillSlot::Unavail);
240 }
241 // And now the new slot. The `dflt` value is needed by `AVLTree` to initialise storage
242 // slots for tree nodes, but we will never actually see those values. So it doesn't
243 // matter what they are.
244 let dflt = RangeFragAndRefness::new(RangeFrag::invalid_value(), false);
245 let tree = AVLTree::<RangeFragAndRefness>::new(dflt);
246 let res = self.slots.len() as u32;
247 self.slots.push(LogicalSpillSlot::InUse {
248 size: req_size,
249 tree,
250 });
251 // And now "block out subsequent slots that `req_size` implies.
252 // viz: req_size == 1 -> block out 0 more
253 // viz: req_size == 2 -> block out 1 more
254 // viz: req_size == 4 -> block out 3 more
255 // viz: req_size == 8 -> block out 7 more
256 for _ in 1..req_size {
257 self.slots.push(LogicalSpillSlot::Unavail);
258 }
259 assert!(self.slots.len() % (req_size as usize) == 0);
260
261 res
262 }
263
264 // THE MAIN FUNCTION
265 // Allocate spill slots for all the VirtualRanges in `vlrix`s eclass,
266 // including `vlrix` itself. Since we are allocating spill slots for
267 // complete eclasses at once, none of the members of the class should
268 // currently have any allocation. This routine will try to allocate all
269 // class members the same slot, but it can only guarantee to do so if the
270 // class members are mutually non-overlapping. Hence it can't guarantee that
271 // in general.
alloc_spill_slots<F: Function>( &mut self, vlr_slot_env: &mut TypedIxVec<VirtualRangeIx, Option<SpillSlot>>, func: &F, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, vlrEquivClasses: &UnionFindEquivClasses<VirtualRangeIx>, vlrix: VirtualRangeIx, )272 pub fn alloc_spill_slots<F: Function>(
273 &mut self,
274 vlr_slot_env: &mut TypedIxVec<VirtualRangeIx, Option<SpillSlot>>,
275 func: &F,
276 vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
277 vlrEquivClasses: &UnionFindEquivClasses<VirtualRangeIx>,
278 vlrix: VirtualRangeIx,
279 ) {
280 let is_ref = vlr_env[vlrix].is_ref;
281 for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
282 // "None of the VLRs in this equivalence class have an allocated spill slot."
283 // This should be true because we allocate spill slots for all of the members of an
284 // eclass at once.
285 assert!(vlr_slot_env[cand_vlrix].is_none());
286
287 // "All of the VLRs in this eclass have the same ref-ness as this VLR."
288 // Why this is true is a bit subtle. The equivalence classes are computed by
289 // `do_coalescing_analysis`, fundamentally by looking at all the move instructions
290 // and computing the transitive closure induced by them. The ref-ness annotations
291 // on each VLR are computed in `do_reftypes_analysis`, and they are also computed
292 // as a transitive closure on the same move instructions. Hence the results should
293 // be identical.
294 //
295 // With all that said, note that these equivalence classes are *not* guaranteed to
296 // be internally non-overlapping. This is explained in the big block comment at the
297 // top of bt_coalescing_analysis.rs.
298 assert!(vlr_env[cand_vlrix].is_ref == is_ref);
299 }
300
301 // Do this in two passes. It's a bit cumbersome.
302 //
303 // In the first pass, find a spill slot which can take all of the
304 // candidates when we try them *individually*, but don't update the tree
305 // yet. We will always find such a slot, because if none of the existing
306 // slots can do it, we can always start a new one.
307 //
308 // Now, that doesn't guarantee that all the candidates can *together*
309 // can be assigned to the chosen slot. That's only possible when they
310 // are non-overlapping. Rather than laboriously try to determine
311 // that, simply proceed with the second pass, the assignment pass, as
312 // follows. For each candidate, try to allocate it to the slot chosen
313 // in the first pass. If it goes in without interference, fine. If
314 // not, that means it overlaps with some other member of the class --
315 // in which case we must find some other slot for it. It's too bad.
316 //
317 // The result is: all members will get a valid spill slot. And if they
318 // were all non overlapping then we are guaranteed that they all get the
319 // same slot. Which is as good as we can hope for.
320 //
321 // In both passes, only the highest-numbered 8 slots are checked for
322 // availability. This is a heuristic hack which both reduces
323 // allocation time and reduces the eventual resulting spilling:
324 //
325 // - It avoids lots of pointless repeated checking of low-numbered
326 // spill slots, that long ago became full(ish) and are unlikely to be
327 // able to take any new VirtualRanges
328 //
329 // - More subtly, it interacts with the question of whether or not
330 // each VirtualRange equivalence class is internally overlapping.
331 // When no overlaps are present, the spill slot allocator guarantees
332 // to find a slot which is free for the entire equivalence class,
333 // which is the ideal solution. When there are overlaps present, the
334 // allocator is forced to allocate at least some of the VirtualRanges
335 // in the class to different slots. By restricting the number of
336 // slots it can choose to 8 (+ extras if it needs them), we reduce the
337 // tendency for the VirtualRanges to be assigned a large number of
338 // different slots, which in turn reduces the amount of spilling in
339 // the end.
340
341 // We need to know what regclass, and hence what slot size, we're looking
342 // for. Just look at the representative; all VirtualRanges in the eclass
343 // must have the same regclass. (If they don't, the client's is_move
344 // function has been giving us wrong information.)
345 let vlrix_vreg = vlr_env[vlrix].vreg;
346 let req_size = func.get_spillslot_size(vlrix_vreg.get_class(), vlrix_vreg);
347 assert!(req_size == 1 || req_size == 2 || req_size == 4 || req_size == 8);
348
349 // Sanity check: if the VLR is reftyped, then it must need a 1-word slot
350 // (anything else is nonsensical.)
351 if is_ref {
352 assert!(req_size == 1);
353 }
354
355 // Pass 1: find a slot which can take all VirtualRanges in `vlrix`s
356 // eclass when tested individually.
357 //
358 // Pass 1a: search existing slots
359 let search_start_slotno: u32 = {
360 // We will only search from `search_start_slotno` upwards. See
361 // block comment above for significance of the value `8`.
362 let window = 8;
363 if self.slots.len() >= window {
364 (self.slots.len() - window) as u32
365 } else {
366 0
367 }
368 };
369 let mut mb_chosen_slotno: Option<u32> = None;
370 // BEGIN search existing slots
371 for cand_slot_no in search_start_slotno..self.slots.len() as u32 {
372 let cand_slot = &self.slots[cand_slot_no as usize];
373 if !cand_slot.is_InUse() {
374 continue;
375 }
376 if cand_slot.get_size() != req_size {
377 continue;
378 }
379 let tree = &cand_slot.get_tree();
380 assert!(mb_chosen_slotno.is_none());
381
382 // BEGIN see if `cand_slot` can hold all eclass members individually
383 let mut all_cands_fit_individually = true;
384 for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
385 let cand_vlr = &vlr_env[cand_vlrix];
386 let this_cand_fits = ssal_is_add_possible(&tree, &cand_vlr.sorted_frags);
387 if !this_cand_fits {
388 all_cands_fit_individually = false;
389 break;
390 }
391 }
392 // END see if `cand_slot` can hold all eclass members individually
393 if !all_cands_fit_individually {
394 continue;
395 }
396
397 // Ok. All eclass members will fit individually in `cand_slot_no`.
398 mb_chosen_slotno = Some(cand_slot_no);
399 break;
400 }
401 // END search existing slots
402
403 // Pass 1b. If we didn't find a usable slot, allocate a new one.
404 let chosen_slotno: u32 = if mb_chosen_slotno.is_none() {
405 self.add_new_slot(req_size)
406 } else {
407 mb_chosen_slotno.unwrap()
408 };
409
410 // Pass 2. Try to allocate each eclass member individually to the chosen
411 // slot. If that fails, just allocate them anywhere.
412 let mut _all_in_chosen = true;
413 'pass2_per_equiv_class: for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
414 let cand_vlr = &vlr_env[cand_vlrix];
415 let mut tree = self.slots[chosen_slotno as usize].get_mut_tree();
416 let added = ssal_add_if_possible(&mut tree, &cand_vlr.sorted_frags);
417 if added {
418 vlr_slot_env[cand_vlrix] = Some(SpillSlot::new(chosen_slotno));
419 continue 'pass2_per_equiv_class;
420 }
421 _all_in_chosen = false;
422 // It won't fit in `chosen_slotno`, so try somewhere (anywhere) else.
423 for alt_slotno in search_start_slotno..self.slots.len() as u32 {
424 let alt_slot = &self.slots[alt_slotno as usize];
425 if !alt_slot.is_InUse() {
426 continue;
427 }
428 if alt_slot.get_size() != req_size {
429 continue;
430 }
431 if alt_slotno == chosen_slotno {
432 // We already know this won't work.
433 continue;
434 }
435 let mut tree = self.slots[alt_slotno as usize].get_mut_tree();
436 let added = ssal_add_if_possible(&mut tree, &cand_vlr.sorted_frags);
437 if added {
438 vlr_slot_env[cand_vlrix] = Some(SpillSlot::new(alt_slotno));
439 continue 'pass2_per_equiv_class;
440 }
441 }
442 // If we get here, it means it won't fit in any slot we currently have.
443 // So allocate a new one and use that.
444 let new_slotno = self.add_new_slot(req_size);
445 let mut tree = self.slots[new_slotno as usize].get_mut_tree();
446 let added = ssal_add_if_possible(&mut tree, &cand_vlr.sorted_frags);
447 if added {
448 vlr_slot_env[cand_vlrix] = Some(SpillSlot::new(new_slotno));
449 continue 'pass2_per_equiv_class;
450 }
451 // We failed to allocate it to any empty slot! This can't happen.
452 panic!("SpillSlotAllocator: alloc_spill_slots: failed?!?!");
453 /*NOTREACHED*/
454 } /* 'pass2_per_equiv_class */
455 }
456
457 // STACKMAP SUPPORT
458 // Mark the `frags` for `slot_no` as being reftyped. They are expected to already exist in
459 // the relevant tree, and not currently be marked as reftyped.
notify_spillage_of_reftyped_vlr( &mut self, slot_no: SpillSlot, frags: &SortedRangeFrags, )460 pub fn notify_spillage_of_reftyped_vlr(
461 &mut self,
462 slot_no: SpillSlot,
463 frags: &SortedRangeFrags,
464 ) {
465 let slot_ix = slot_no.get_usize();
466 assert!(slot_ix < self.slots.len());
467 let slot = &mut self.slots[slot_ix];
468 match slot {
469 LogicalSpillSlot::InUse { size, tree } if *size == 1 => {
470 ssal_mark_frags_as_reftyped(tree, frags)
471 }
472 _ => panic!("SpillSlotAllocator::notify_spillage_of_reftyped_vlr: invalid slot"),
473 }
474 }
475
476 // STACKMAP SUPPORT
477 // Allocate a size-1 (word!) spill slot for `frag` and return it. The slot is marked
478 // reftyped so that a later call to `get_reftyped_spillslots_at_inst_point` will return it.
alloc_reftyped_spillslot_for_frag(&mut self, frag: RangeFrag) -> SpillSlot479 pub fn alloc_reftyped_spillslot_for_frag(&mut self, frag: RangeFrag) -> SpillSlot {
480 for i in 0..self.slots.len() {
481 match &mut self.slots[i] {
482 LogicalSpillSlot::InUse { size: 1, tree } => {
483 if ssal_is_add_frag_possible(&tree, &frag) {
484 // We're in luck.
485 let inserted = tree.insert(
486 RangeFragAndRefness::new(frag, /*is_ref=*/ true),
487 Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
488 cmp_range_frags(&item1.frag, &item2.frag)
489 }),
490 );
491 // This can't fail -- we just checked for it!
492 assert!(inserted);
493 return SpillSlot::new(i as u32);
494 }
495 // Otherwise move on.
496 }
497 LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
498 // Slot isn't is use, or is in use but for values of some non-ref size.
499 // Move on.
500 }
501 }
502 }
503 // We tried all slots, but without success. Add a new one and try again. This time we
504 // must succeed. Calling recursively is a bit stupid in the sense that we then search
505 // again to find the slot we just allocated, but hey.
506 self.add_new_slot(1 /*word*/);
507 self.alloc_reftyped_spillslot_for_frag(frag) // \o/ tailcall \o/
508 }
509
510 // STACKMAP SUPPORT
511 // Examine all the spill slots at `pt` and return those that are reftyped. This is
512 // fundamentally what creates a stack map.
get_reftyped_spillslots_at_inst_point(&self, pt: InstPoint) -> Vec<SpillSlot>513 pub fn get_reftyped_spillslots_at_inst_point(&self, pt: InstPoint) -> Vec<SpillSlot> {
514 let mut res = Vec::<SpillSlot>::new();
515 for (i, slot) in self.slots.iter().enumerate() {
516 if slot.get_refness_at_inst_point(pt) == Some(true) {
517 res.push(SpillSlot::new(i as u32));
518 }
519 }
520 res
521 }
522 }
523