1 //! Debug info analysis: computes value-label ranges from value-label markers in
2 //! generated VCode.
3 //!
4 //! We "reverse-engineer" debug info like this because it is far more reliable
5 //! than generating it while emitting code and keeping it in sync.
6 //!
7 //! This works by (i) observing "value-label marker" instructions, which are
8 //! semantically just an assignment from a register to a "value label" (which
9 //! one can think of as another register; they represent, e.g., Wasm locals) at
10 //! a certain point in the code, and (ii) observing loads and stores to the
11 //! stack and register moves.
12 //!
13 //! We track, at every program point, the correspondence between each value
14 //! label and *all* locations in which it resides. E.g., if it is stored to the
15 //! stack, we remember that it is in both a register and the stack slot; but if
16 //! the register is later overwritten, then we have it just in the stack slot.
17 //! This allows us to avoid false-positives observing loads/stores that we think
18 //! are spillslots but really aren't.
19 //!
20 //! We do a standard forward dataflow analysis to compute this info.
21 
22 use crate::ir::ValueLabel;
23 use crate::machinst::*;
24 use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
25 use log::trace;
26 use regalloc::{Reg, RegUsageCollector};
27 use std::collections::{HashMap, HashSet};
28 use std::hash::Hash;
29 
30 /// Location of a labeled value: in a register or in a stack slot. Note that a
31 /// value may live in more than one location; `AnalysisInfo` maps each
32 /// value-label to multiple `ValueLoc`s.
33 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
34 enum ValueLoc {
35     Reg(Reg),
36     /// Nominal-SP offset.
37     Stack(i64),
38 }
39 
40 impl From<ValueLoc> for LabelValueLoc {
from(v: ValueLoc) -> Self41     fn from(v: ValueLoc) -> Self {
42         match v {
43             ValueLoc::Reg(r) => LabelValueLoc::Reg(r),
44             ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off),
45         }
46     }
47 }
48 
49 impl ValueLoc {
is_reg(self) -> bool50     fn is_reg(self) -> bool {
51         match self {
52             ValueLoc::Reg(_) => true,
53             _ => false,
54         }
55     }
is_stack(self) -> bool56     fn is_stack(self) -> bool {
57         match self {
58             ValueLoc::Stack(_) => true,
59             _ => false,
60         }
61     }
62 }
63 
64 /// Mappings at one program point.
65 #[derive(Clone, Debug)]
66 struct AnalysisInfo {
67     /// Nominal SP relative to real SP. If `None`, then the offset is
68     /// indeterminate (i.e., we merged to the lattice 'bottom' element). This
69     /// should not happen in well-formed code.
70     nominal_sp_offset: Option<i64>,
71     /// Forward map from labeled values to sets of locations.
72     label_to_locs: HashMap<ValueLabel, HashSet<ValueLoc>>,
73     /// Reverse map for each register indicating the value it holds, if any.
74     reg_to_label: HashMap<Reg, ValueLabel>,
75     /// Reverse map for each stack offset indicating the value it holds, if any.
76     stack_to_label: HashMap<i64, ValueLabel>,
77 }
78 
79 /// Get the registers written (mod'd or def'd) by a machine instruction.
get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg>80 fn get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg> {
81     // TODO: expose this part of regalloc.rs's interface publicly.
82     let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false);
83     let mut coll = RegUsageCollector::new(&mut vecs);
84     m.get_regs(&mut coll);
85     vecs.defs.extend(vecs.mods.into_iter());
86     vecs.defs
87 }
88 
89 impl AnalysisInfo {
90     /// Create a new analysis state. This is the "top" lattice element at which
91     /// the fixpoint dataflow analysis starts.
new() -> Self92     fn new() -> Self {
93         AnalysisInfo {
94             nominal_sp_offset: Some(0),
95             label_to_locs: HashMap::new(),
96             reg_to_label: HashMap::new(),
97             stack_to_label: HashMap::new(),
98         }
99     }
100 
101     /// Remove all locations for a given labeled value. Used when the labeled
102     /// value is redefined (so old values become stale).
clear_label(&mut self, label: ValueLabel)103     fn clear_label(&mut self, label: ValueLabel) {
104         if let Some(locs) = self.label_to_locs.remove(&label) {
105             for loc in locs {
106                 match loc {
107                     ValueLoc::Reg(r) => {
108                         self.reg_to_label.remove(&r);
109                     }
110                     ValueLoc::Stack(off) => {
111                         self.stack_to_label.remove(&off);
112                     }
113                 }
114             }
115         }
116     }
117 
118     /// Remove a label from a register, if any. Used, e.g., if the register is
119     /// overwritten.
clear_reg(&mut self, reg: Reg)120     fn clear_reg(&mut self, reg: Reg) {
121         if let Some(label) = self.reg_to_label.remove(&reg) {
122             if let Some(locs) = self.label_to_locs.get_mut(&label) {
123                 locs.remove(&ValueLoc::Reg(reg));
124             }
125         }
126     }
127 
128     /// Remove a label from a stack offset, if any. Used, e.g., when the stack
129     /// slot is overwritten.
clear_stack_off(&mut self, off: i64)130     fn clear_stack_off(&mut self, off: i64) {
131         if let Some(label) = self.stack_to_label.remove(&off) {
132             if let Some(locs) = self.label_to_locs.get_mut(&label) {
133                 locs.remove(&ValueLoc::Stack(off));
134             }
135         }
136     }
137 
138     /// Indicate that a labeled value is newly defined and its new value is in
139     /// `reg`.
def_label_at_reg(&mut self, label: ValueLabel, reg: Reg)140     fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) {
141         self.clear_label(label);
142         self.label_to_locs
143             .entry(label)
144             .or_insert_with(|| HashSet::new())
145             .insert(ValueLoc::Reg(reg));
146         self.reg_to_label.insert(reg, label);
147     }
148 
149     /// Process a store from a register to a stack slot (offset).
store_reg(&mut self, reg: Reg, off: i64)150     fn store_reg(&mut self, reg: Reg, off: i64) {
151         self.clear_stack_off(off);
152         if let Some(label) = self.reg_to_label.get(&reg) {
153             if let Some(locs) = self.label_to_locs.get_mut(label) {
154                 locs.insert(ValueLoc::Stack(off));
155             }
156             self.stack_to_label.insert(off, *label);
157         }
158     }
159 
160     /// Process a load from a stack slot (offset) to a register.
load_reg(&mut self, reg: Reg, off: i64)161     fn load_reg(&mut self, reg: Reg, off: i64) {
162         self.clear_reg(reg);
163         if let Some(&label) = self.stack_to_label.get(&off) {
164             if let Some(locs) = self.label_to_locs.get_mut(&label) {
165                 locs.insert(ValueLoc::Reg(reg));
166             }
167             self.reg_to_label.insert(reg, label);
168         }
169     }
170 
171     /// Process a move from one register to another.
move_reg(&mut self, to: Reg, from: Reg)172     fn move_reg(&mut self, to: Reg, from: Reg) {
173         self.clear_reg(to);
174         if let Some(&label) = self.reg_to_label.get(&from) {
175             if let Some(locs) = self.label_to_locs.get_mut(&label) {
176                 locs.insert(ValueLoc::Reg(to));
177             }
178             self.reg_to_label.insert(to, label);
179         }
180     }
181 
182     /// Update the analysis state w.r.t. an instruction's effects. Given the
183     /// state just before `inst`, this method updates `self` to be the state
184     /// just after `inst`.
step<M: MachInst>(&mut self, inst: &M)185     fn step<M: MachInst>(&mut self, inst: &M) {
186         for write in get_inst_writes(inst) {
187             self.clear_reg(write);
188         }
189         if let Some((label, reg)) = inst.defines_value_label() {
190             self.def_label_at_reg(label, reg);
191         }
192         match inst.stack_op_info() {
193             Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => {
194                 self.load_reg(reg, offset + self.nominal_sp_offset.unwrap());
195             }
196             Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => {
197                 self.store_reg(reg, offset + self.nominal_sp_offset.unwrap());
198             }
199             Some(MachInstStackOpInfo::NomSPAdj(offset)) => {
200                 if self.nominal_sp_offset.is_some() {
201                     self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset);
202                 }
203             }
204             _ => {}
205         }
206         if let Some((to, from)) = inst.is_move() {
207             let to = to.to_reg();
208             self.move_reg(to, from);
209         }
210     }
211 }
212 
213 /// Trait used to implement the dataflow analysis' meet (intersect) function
214 /// onthe `AnalysisInfo` components. For efficiency, this is implemented as a
215 /// mutation on the LHS, rather than a pure functional operation.
216 trait IntersectFrom {
intersect_from(&mut self, other: &Self) -> IntersectResult217     fn intersect_from(&mut self, other: &Self) -> IntersectResult;
218 }
219 
220 /// Result of an intersection operation. Indicates whether the mutated LHS
221 /// (which becomes the intersection result) differs from the original LHS. Also
222 /// indicates if the value has become "empty" and should be removed from a
223 /// parent container, if any.
224 struct IntersectResult {
225     /// Did the intersection change the LHS input (the one that was mutated into
226     /// the result)? This is needed to drive the fixpoint loop; when no more
227     /// changes occur, then we have converted.
228     changed: bool,
229     /// Is the resulting value "empty"? This can be used when a container, such
230     /// as a map, holds values of this (intersection result) type; when
231     /// `is_empty` is true for the merge of the values at a particular key, we
232     /// can remove that key from the merged (intersected) result. This is not
233     /// necessary for analysis correctness but reduces the memory and runtime
234     /// cost of the fixpoint loop.
235     is_empty: bool,
236 }
237 
238 impl IntersectFrom for AnalysisInfo {
intersect_from(&mut self, other: &Self) -> IntersectResult239     fn intersect_from(&mut self, other: &Self) -> IntersectResult {
240         let mut changed = false;
241         changed |= self
242             .nominal_sp_offset
243             .intersect_from(&other.nominal_sp_offset)
244             .changed;
245         changed |= self
246             .label_to_locs
247             .intersect_from(&other.label_to_locs)
248             .changed;
249         changed |= self
250             .reg_to_label
251             .intersect_from(&other.reg_to_label)
252             .changed;
253         changed |= self
254             .stack_to_label
255             .intersect_from(&other.stack_to_label)
256             .changed;
257         IntersectResult {
258             changed,
259             is_empty: false,
260         }
261     }
262 }
263 
264 impl<K, V> IntersectFrom for HashMap<K, V>
265 where
266     K: Copy + Eq + Hash,
267     V: IntersectFrom,
268 {
269     /// Intersection for hashmap: remove keys that are not in both inputs;
270     /// recursively intersect values for keys in common.
intersect_from(&mut self, other: &Self) -> IntersectResult271     fn intersect_from(&mut self, other: &Self) -> IntersectResult {
272         let mut changed = false;
273         let mut remove_keys = vec![];
274         for k in self.keys() {
275             if !other.contains_key(k) {
276                 remove_keys.push(*k);
277             }
278         }
279         for k in &remove_keys {
280             changed = true;
281             self.remove(k);
282         }
283 
284         remove_keys.clear();
285         for k in other.keys() {
286             if let Some(v) = self.get_mut(k) {
287                 let result = v.intersect_from(other.get(k).unwrap());
288                 changed |= result.changed;
289                 if result.is_empty {
290                     remove_keys.push(*k);
291                 }
292             }
293         }
294         for k in &remove_keys {
295             changed = true;
296             self.remove(k);
297         }
298 
299         IntersectResult {
300             changed,
301             is_empty: self.len() == 0,
302         }
303     }
304 }
305 impl<T> IntersectFrom for HashSet<T>
306 where
307     T: Copy + Eq + Hash,
308 {
309     /// Intersection for hashset: just take the set intersection.
intersect_from(&mut self, other: &Self) -> IntersectResult310     fn intersect_from(&mut self, other: &Self) -> IntersectResult {
311         let mut changed = false;
312         let mut remove = vec![];
313         for val in self.iter() {
314             if !other.contains(val) {
315                 remove.push(*val);
316             }
317         }
318         for val in remove {
319             changed = true;
320             self.remove(&val);
321         }
322 
323         IntersectResult {
324             changed,
325             is_empty: self.len() == 0,
326         }
327     }
328 }
329 impl IntersectFrom for ValueLabel {
330     // Intersection for labeled value: remove if not equal. This is equivalent
331     // to a three-level lattice with top, bottom, and unordered set of
332     // individual labels in between.
intersect_from(&mut self, other: &Self) -> IntersectResult333     fn intersect_from(&mut self, other: &Self) -> IntersectResult {
334         IntersectResult {
335             changed: false,
336             is_empty: *self != *other,
337         }
338     }
339 }
340 impl<T> IntersectFrom for Option<T>
341 where
342     T: Copy + Eq,
343 {
344     /// Intersectino for Option<T>: recursively intersect if both `Some`, else
345     /// `None`.
intersect_from(&mut self, other: &Self) -> IntersectResult346     fn intersect_from(&mut self, other: &Self) -> IntersectResult {
347         let mut changed = false;
348         if !(self.is_some() && other.is_some() && self == other) {
349             changed = true;
350             *self = None;
351         }
352         IntersectResult {
353             changed,
354             is_empty: self.is_none(),
355         }
356     }
357 }
358 
359 /// Compute the value-label ranges (locations for program-point ranges for
360 /// labeled values) from a given `VCode` compilation result.
361 ///
362 /// In order to compute this information, we perform a dataflow analysis on the
363 /// machine code. To do so, and translate the results into a form usable by the
364 /// debug-info consumers, we need to know two additional things:
365 ///
366 /// - The machine-code layout (code offsets) of the instructions. DWARF is
367 ///   encoded in terms of instruction *ends* (and we reason about value
368 ///   locations at program points *after* instructions, to match this), so we
369 ///   take an array `inst_ends`, giving us code offsets for each instruction's
370 ///   end-point. (Note that this is one *past* the last byte; so a 4-byte
371 ///   instruction at offset 0 has an end offset of 4.)
372 ///
373 /// - The locations of the labels to which branches will jump. Branches can tell
374 ///   us about their targets in terms of `MachLabel`s, but we don't know where
375 ///   those `MachLabel`s will be placed in the linear array of instructions.  We
376 ///   take the array `label_insn_index` to provide this info: for a label with
377 ///   index `l`, `label_insn_index[l]` is the index of the instruction before
378 ///   which that label is bound.
compute<I: VCodeInst>( insts: &[I], inst_ends: &[u32], label_insn_index: &[u32], ) -> ValueLabelsRanges379 pub(crate) fn compute<I: VCodeInst>(
380     insts: &[I],
381     inst_ends: &[u32],
382     label_insn_index: &[u32],
383 ) -> ValueLabelsRanges {
384     let inst_start = |idx: usize| if idx == 0 { 0 } else { inst_ends[idx - 1] };
385 
386     trace!("compute: insts =");
387     for i in 0..insts.len() {
388         trace!(" #{} end: {} -> {:?}", i, inst_ends[i], insts[i]);
389     }
390     trace!("label_insn_index: {:?}", label_insn_index);
391 
392     // Info at each block head, indexed by label.
393     let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
394 
395     // Initialize state at entry.
396     block_starts.insert(0, AnalysisInfo::new());
397 
398     // Worklist: label indices for basic blocks.
399     let mut worklist = Vec::new();
400     let mut worklist_set = HashSet::new();
401     worklist.push(0);
402     worklist_set.insert(0);
403 
404     while !worklist.is_empty() {
405         let block = worklist.pop().unwrap();
406         worklist_set.remove(&block);
407 
408         let mut state = block_starts.get(&block).unwrap().clone();
409         trace!("at block {} -> state: {:?}", block, state);
410         // Iterate for each instruction in the block (we break at the first
411         // terminator we see).
412         let mut index = label_insn_index[block as usize];
413         while index < insts.len() as u32 {
414             state.step(&insts[index as usize]);
415             trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
416             trace!("    --> state: {:?}", state);
417 
418             let term = insts[index as usize].is_term();
419             if term.is_term() {
420                 for succ in term.get_succs() {
421                     trace!("    SUCCESSOR block {}", succ.get());
422                     if let Some(succ_state) = block_starts.get_mut(&succ.get()) {
423                         trace!("       orig state: {:?}", succ_state);
424                         if succ_state.intersect_from(&state).changed {
425                             if worklist_set.insert(succ.get()) {
426                                 worklist.push(succ.get());
427                             }
428                             trace!("        (changed)");
429                         }
430                         trace!("       new state: {:?}", succ_state);
431                     } else {
432                         // First time seeing this block
433                         block_starts.insert(succ.get(), state.clone());
434                         worklist.push(succ.get());
435                         worklist_set.insert(succ.get());
436                     }
437                 }
438                 break;
439             }
440 
441             index += 1;
442         }
443     }
444 
445     // Now iterate over blocks one last time, collecting
446     // value-label locations.
447 
448     let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
449     for block in 0..label_insn_index.len() {
450         let start_index = label_insn_index[block];
451         let end_index = if block == label_insn_index.len() - 1 {
452             insts.len() as u32
453         } else {
454             label_insn_index[block + 1]
455         };
456         let block = block as u32;
457         let mut state = block_starts.get(&block).unwrap().clone();
458         for index in start_index..end_index {
459             let offset = inst_start(index as usize);
460             let end = inst_ends[index as usize];
461             state.step(&insts[index as usize]);
462 
463             for (label, locs) in &state.label_to_locs {
464                 trace!("   inst {} has label {:?} -> locs {:?}", index, label, locs);
465                 // Find an appropriate loc: a register if possible, otherwise pick the first stack
466                 // loc.
467                 let reg = locs.iter().cloned().find(|l| l.is_reg());
468                 let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack()));
469                 if let Some(loc) = loc {
470                     let loc = LabelValueLoc::from(loc);
471                     let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]);
472                     // If the existing location list for this value-label is
473                     // either empty, or has an end location that does not extend
474                     // to the current offset, then we have to append a new
475                     // entry. Otherwise, we can extend the current entry.
476                     //
477                     // Note that `end` is one past the end of the instruction;
478                     // it appears that `end` is exclusive, so a mapping valid at
479                     // offset 5 will have start = 5, end = 6.
480                     if list
481                         .last()
482                         .map(|last| last.end <= offset || last.loc != loc)
483                         .unwrap_or(true)
484                     {
485                         list.push(ValueLocRange {
486                             loc,
487                             start: end,
488                             end: end + 1,
489                         });
490                     } else {
491                         list.last_mut().unwrap().end = end + 1;
492                     }
493                 }
494             }
495         }
496     }
497 
498     trace!("ret: {:?}", value_labels_ranges);
499     value_labels_ranges
500 }
501