cranelift_codegen/
alias_analysis.rs

1//! Alias analysis, consisting of a "last store" pass and a "memory
2//! values" pass. These two passes operate as one fused pass, and so
3//! are implemented together here.
4//!
5//! We partition memory state into several *disjoint pieces* of
6//! "abstract state". There are a finite number of such pieces:
7//! currently, we call them "heap", "table", "vmctx", and "other".Any
8//! given address in memory belongs to exactly one disjoint piece.
9//!
10//! One never tracks which piece a concrete address belongs to at
11//! runtime; this is a purely static concept. Instead, all
12//! memory-accessing instructions (loads and stores) are labeled with
13//! one of these four categories in the `MemFlags`. It is forbidden
14//! for a load or store to access memory under one category and a
15//! later load or store to access the same memory under a different
16//! category. This is ensured to be true by construction during
17//! frontend translation into CLIF and during legalization.
18//!
19//! Given that this non-aliasing property is ensured by the producer
20//! of CLIF, we can compute a *may-alias* property: one load or store
21//! may-alias another load or store if both access the same category
22//! of abstract state.
23//!
24//! The "last store" pass helps to compute this aliasing: it scans the
25//! code, finding at each program point the last instruction that
26//! *might have* written to a given part of abstract state.
27//!
28//! We can't say for sure that the "last store" *did* actually write
29//! that state, but we know for sure that no instruction *later* than
30//! it (up to the current instruction) did. However, we can get a
31//! must-alias property from this: if at a given load or store, we
32//! look backward to the "last store", *AND* we find that it has
33//! exactly the same address expression and type, then we know that
34//! the current instruction's access *must* be to the same memory
35//! location.
36//!
37//! To get this must-alias property, we compute a sparse table of
38//! "memory values": these are known equivalences between SSA `Value`s
39//! and particular locations in memory. The memory-values table is a
40//! mapping from (last store, address expression, type) to SSA
41//! value. At a store, we can insert into this table directly. At a
42//! load, we can also insert, if we don't already have a value (from
43//! the store that produced the load's value).
44//!
45//! Then we can do two optimizations at once given this table. If a
46//! load accesses a location identified by a (last store, address,
47//! type) key already in the table, we replace it with the SSA value
48//! for that memory location. This is usually known as "redundant load
49//! elimination" if the value came from an earlier load of the same
50//! location, or "store-to-load forwarding" if the value came from an
51//! earlier store to the same location.
52//!
53//! In theory we could also do *dead-store elimination*, where if a
54//! store overwrites a key in the table, *and* if no other load/store
55//! to the abstract state category occurred, *and* no other trapping
56//! instruction occurred (at which point we need an up-to-date memory
57//! state because post-trap-termination memory state can be observed),
58//! *and* we can prove the original store could not have trapped, then
59//! we can eliminate the original store. Because this is so complex,
60//! and the conditions for doing it correctly when post-trap state
61//! must be correct likely reduce the potential benefit, we don't yet
62//! do this.
63
64use crate::{
65    cursor::{Cursor, FuncCursor},
66    dominator_tree::DominatorTree,
67    fx::{FxHashMap, FxHashSet},
68    inst_predicates::{
69        has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs,
70    },
71    ir::{immediates::Offset32, Block, Function, Inst, Opcode, Type, Value},
72    trace,
73};
74use cranelift_entity::{packed_option::PackedOption, EntityRef};
75
76/// For a given program point, the vector of last-store instruction
77/// indices for each disjoint category of abstract state.
78#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
79pub struct LastStores {
80    heap: PackedOption<Inst>,
81    table: PackedOption<Inst>,
82    vmctx: PackedOption<Inst>,
83    other: PackedOption<Inst>,
84}
85
86impl LastStores {
87    fn update(&mut self, func: &Function, inst: Inst) {
88        let opcode = func.dfg.insts[inst].opcode();
89        if has_memory_fence_semantics(opcode) {
90            self.heap = inst.into();
91            self.table = inst.into();
92            self.vmctx = inst.into();
93            self.other = inst.into();
94        } else if opcode.can_store() {
95            if let Some(memflags) = func.dfg.insts[inst].memflags() {
96                if memflags.heap() {
97                    self.heap = inst.into();
98                } else if memflags.table() {
99                    self.table = inst.into();
100                } else if memflags.vmctx() {
101                    self.vmctx = inst.into();
102                } else {
103                    self.other = inst.into();
104                }
105            } else {
106                self.heap = inst.into();
107                self.table = inst.into();
108                self.vmctx = inst.into();
109                self.other = inst.into();
110            }
111        }
112    }
113
114    fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> {
115        if let Some(memflags) = func.dfg.insts[inst].memflags() {
116            if memflags.heap() {
117                self.heap
118            } else if memflags.table() {
119                self.table
120            } else if memflags.vmctx() {
121                self.vmctx
122            } else {
123                self.other
124            }
125        } else if func.dfg.insts[inst].opcode().can_load()
126            || func.dfg.insts[inst].opcode().can_store()
127        {
128            inst.into()
129        } else {
130            PackedOption::default()
131        }
132    }
133
134    fn meet_from(&mut self, other: &LastStores, loc: Inst) {
135        let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> {
136            match (a.into(), b.into()) {
137                (None, None) => None.into(),
138                (Some(a), None) => a,
139                (None, Some(b)) => b,
140                (Some(a), Some(b)) if a == b => a,
141                _ => loc.into(),
142            }
143        };
144
145        self.heap = meet(self.heap, other.heap);
146        self.table = meet(self.table, other.table);
147        self.vmctx = meet(self.vmctx, other.vmctx);
148        self.other = meet(self.other, other.other);
149    }
150}
151
152/// A key identifying a unique memory location.
153///
154/// For the result of a load to be equivalent to the result of another
155/// load, or the store data from a store, we need for (i) the
156/// "version" of memory (here ensured by having the same last store
157/// instruction to touch the disjoint category of abstract state we're
158/// accessing); (ii) the address must be the same (here ensured by
159/// having the same SSA value, which doesn't change after computed);
160/// (iii) the offset must be the same; and (iv) the accessed type and
161/// extension mode (e.g., 8-to-32, signed) must be the same.
162#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
163struct MemoryLoc {
164    last_store: PackedOption<Inst>,
165    address: Value,
166    offset: Offset32,
167    ty: Type,
168    /// We keep the *opcode* of the instruction that produced the
169    /// value we record at this key if the opcode is anything other
170    /// than an ordinary load or store. This is needed when we
171    /// consider loads that extend the value: e.g., an 8-to-32
172    /// sign-extending load will produce a 32-bit value from an 8-bit
173    /// value in memory, so we can only reuse that (as part of RLE)
174    /// for another load with the same extending opcode.
175    ///
176    /// We could improve the transform to insert explicit extend ops
177    /// in place of extending loads when we know the memory value, but
178    /// we haven't yet done this.
179    extending_opcode: Option<Opcode>,
180}
181
182/// An alias-analysis pass.
183pub struct AliasAnalysis<'a> {
184    /// The domtree for the function.
185    domtree: &'a DominatorTree,
186
187    /// Input state to a basic block.
188    block_input: FxHashMap<Block, LastStores>,
189
190    /// Known memory-value equivalences. This is the result of the
191    /// analysis. This is a mapping from (last store, address
192    /// expression, offset, type) to SSA `Value`.
193    ///
194    /// We keep the defining inst around for quick dominance checks.
195    mem_values: FxHashMap<MemoryLoc, (Inst, Value)>,
196}
197
198impl<'a> AliasAnalysis<'a> {
199    /// Perform an alias analysis pass.
200    pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> {
201        trace!("alias analysis: input is:\n{:?}", func);
202        let mut analysis = AliasAnalysis {
203            domtree,
204            block_input: FxHashMap::default(),
205            mem_values: FxHashMap::default(),
206        };
207
208        analysis.compute_block_input_states(func);
209        analysis
210    }
211
212    fn compute_block_input_states(&mut self, func: &Function) {
213        let mut queue = vec![];
214        let mut queue_set = FxHashSet::default();
215        let entry = func.layout.entry_block().unwrap();
216        queue.push(entry);
217        queue_set.insert(entry);
218
219        while let Some(block) = queue.pop() {
220            queue_set.remove(&block);
221            let mut state = self
222                .block_input
223                .entry(block)
224                .or_insert_with(|| LastStores::default())
225                .clone();
226
227            trace!(
228                "alias analysis: input to block{} is {:?}",
229                block.index(),
230                state
231            );
232
233            for inst in func.layout.block_insts(block) {
234                state.update(func, inst);
235                trace!("after inst{}: state is {:?}", inst.index(), state);
236            }
237
238            visit_block_succs(func, block, |_inst, succ, _from_table| {
239                let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap();
240                let updated = match self.block_input.get_mut(&succ) {
241                    Some(succ_state) => {
242                        let old = succ_state.clone();
243                        succ_state.meet_from(&state, succ_first_inst);
244                        *succ_state != old
245                    }
246                    None => {
247                        self.block_input.insert(succ, state.clone());
248                        true
249                    }
250                };
251
252                if updated && queue_set.insert(succ) {
253                    queue.push(succ);
254                }
255            });
256        }
257    }
258
259    /// Get the starting state for a block.
260    pub fn block_starting_state(&self, block: Block) -> LastStores {
261        self.block_input
262            .get(&block)
263            .cloned()
264            .unwrap_or_else(|| LastStores::default())
265    }
266
267    /// Process one instruction. Meant to be invoked in program order
268    /// within a block, and ideally in RPO or at least some domtree
269    /// preorder for maximal reuse.
270    ///
271    /// Returns `true` if instruction was removed.
272    pub fn process_inst(
273        &mut self,
274        func: &mut Function,
275        state: &mut LastStores,
276        inst: Inst,
277    ) -> Option<Value> {
278        trace!(
279            "alias analysis: scanning at inst{} with state {:?} ({:?})",
280            inst.index(),
281            state,
282            func.dfg.insts[inst],
283        );
284
285        let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst)
286        {
287            let address = func.dfg.resolve_aliases(address);
288            let opcode = func.dfg.insts[inst].opcode();
289
290            if opcode.can_store() {
291                let store_data = inst_store_data(func, inst).unwrap();
292                let store_data = func.dfg.resolve_aliases(store_data);
293                let mem_loc = MemoryLoc {
294                    last_store: inst.into(),
295                    address,
296                    offset,
297                    ty,
298                    extending_opcode: get_ext_opcode(opcode),
299                };
300                trace!(
301                    "alias analysis: at inst{}: store with data v{} at loc {:?}",
302                    inst.index(),
303                    store_data.index(),
304                    mem_loc
305                );
306                self.mem_values.insert(mem_loc, (inst, store_data));
307
308                None
309            } else if opcode.can_load() {
310                let last_store = state.get_last_store(func, inst);
311                let load_result = func.dfg.inst_results(inst)[0];
312                let mem_loc = MemoryLoc {
313                    last_store,
314                    address,
315                    offset,
316                    ty,
317                    extending_opcode: get_ext_opcode(opcode),
318                };
319                trace!(
320                    "alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
321                    inst.index(),
322                    last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
323                    mem_loc
324                );
325
326                // Is there a Value already known to be stored
327                // at this specific memory location?  If so,
328                // we can alias the load result to this
329                // already-known Value.
330                //
331                // Check if the definition dominates this
332                // location; it might not, if it comes from a
333                // load (stores will always dominate though if
334                // their `last_store` survives through
335                // meet-points to this use-site).
336                let aliased =
337                    if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {
338                        trace!(
339                            " -> sees known value v{} from inst{}",
340                            value.index(),
341                            def_inst.index()
342                        );
343                        if self.domtree.dominates(def_inst, inst, &func.layout) {
344                            trace!(
345                                " -> dominates; value equiv from v{} to v{} inserted",
346                                load_result.index(),
347                                value.index()
348                            );
349                            Some(value)
350                        } else {
351                            None
352                        }
353                    } else {
354                        None
355                    };
356
357                // Otherwise, we can keep *this* load around
358                // as a new equivalent value.
359                if aliased.is_none() {
360                    trace!(
361                        " -> inserting load result v{} at loc {:?}",
362                        load_result.index(),
363                        mem_loc
364                    );
365                    self.mem_values.insert(mem_loc, (inst, load_result));
366                }
367
368                aliased
369            } else {
370                None
371            }
372        } else {
373            None
374        };
375
376        state.update(func, inst);
377
378        replacing_value
379    }
380
381    /// Make a pass and update known-redundant loads to aliased
382    /// values. We interleave the updates with the memory-location
383    /// tracking because resolving some aliases may expose others
384    /// (e.g. in cases of double-indirection with two separate chains
385    /// of loads).
386    pub fn compute_and_update_aliases(&mut self, func: &mut Function) {
387        let mut pos = FuncCursor::new(func);
388
389        while let Some(block) = pos.next_block() {
390            let mut state = self.block_starting_state(block);
391            while let Some(inst) = pos.next_inst() {
392                if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) {
393                    let result = pos.func.dfg.inst_results(inst)[0];
394                    pos.func.dfg.detach_results(inst);
395                    pos.func.dfg.change_to_alias(result, replaced_result);
396                    pos.remove_inst_and_step_back();
397                }
398            }
399        }
400    }
401}
402
403fn get_ext_opcode(op: Opcode) -> Option<Opcode> {
404    debug_assert!(op.can_load() || op.can_store());
405    match op {
406        Opcode::Load | Opcode::Store => None,
407        _ => Some(op),
408    }
409}