1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
//! Alias analysis, consisting of a "last store" pass and a "memory
//! values" pass. These two passes operate as one fused pass, and so
//! are implemented together here.
//!
//! We partition memory state into several *disjoint pieces* of
//! "abstract state". There are a finite number of such pieces:
//! currently, we call them "heap", "table", "vmctx", and "other".Any
//! given address in memory belongs to exactly one disjoint piece.
//!
//! One never tracks which piece a concrete address belongs to at
//! runtime; this is a purely static concept. Instead, all
//! memory-accessing instructions (loads and stores) are labeled with
//! one of these four categories in the `MemFlags`. It is forbidden
//! for a load or store to access memory under one category and a
//! later load or store to access the same memory under a different
//! category. This is ensured to be true by construction during
//! frontend translation into CLIF and during legalization.
//!
//! Given that this non-aliasing property is ensured by the producer
//! of CLIF, we can compute a *may-alias* property: one load or store
//! may-alias another load or store if both access the same category
//! of abstract state.
//!
//! The "last store" pass helps to compute this aliasing: it scans the
//! code, finding at each program point the last instruction that
//! *might have* written to a given part of abstract state.
//!
//! We can't say for sure that the "last store" *did* actually write
//! that state, but we know for sure that no instruction *later* than
//! it (up to the current instruction) did. However, we can get a
//! must-alias property from this: if at a given load or store, we
//! look backward to the "last store", *AND* we find that it has
//! exactly the same address expression and type, then we know that
//! the current instruction's access *must* be to the same memory
//! location.
//!
//! To get this must-alias property, we compute a sparse table of
//! "memory values": these are known equivalences between SSA `Value`s
//! and particular locations in memory. The memory-values table is a
//! mapping from (last store, address expression, type) to SSA
//! value. At a store, we can insert into this table directly. At a
//! load, we can also insert, if we don't already have a value (from
//! the store that produced the load's value).
//!
//! Then we can do two optimizations at once given this table. If a
//! load accesses a location identified by a (last store, address,
//! type) key already in the table, we replace it with the SSA value
//! for that memory location. This is usually known as "redundant load
//! elimination" if the value came from an earlier load of the same
//! location, or "store-to-load forwarding" if the value came from an
//! earlier store to the same location.
//!
//! In theory we could also do *dead-store elimination*, where if a
//! store overwrites a key in the table, *and* if no other load/store
//! to the abstract state category occurred, *and* no other trapping
//! instruction occurred (at which point we need an up-to-date memory
//! state because post-trap-termination memory state can be observed),
//! *and* we can prove the original store could not have trapped, then
//! we can eliminate the original store. Because this is so complex,
//! and the conditions for doing it correctly when post-trap state
//! must be correct likely reduce the potential benefit, we don't yet
//! do this.

use crate::{
    cursor::{Cursor, FuncCursor},
    dominator_tree::DominatorTree,
    fx::{FxHashMap, FxHashSet},
    inst_predicates::{
        has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs,
    },
    ir::{immediates::Offset32, Block, Function, Inst, Opcode, Type, Value},
    trace,
};
use cranelift_entity::{packed_option::PackedOption, EntityRef};

/// For a given program point, the vector of last-store instruction
/// indices for each disjoint category of abstract state.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct LastStores {
    heap: PackedOption<Inst>,
    table: PackedOption<Inst>,
    vmctx: PackedOption<Inst>,
    other: PackedOption<Inst>,
}

impl LastStores {
    fn update(&mut self, func: &Function, inst: Inst) {
        let opcode = func.dfg.insts[inst].opcode();
        if has_memory_fence_semantics(opcode) {
            self.heap = inst.into();
            self.table = inst.into();
            self.vmctx = inst.into();
            self.other = inst.into();
        } else if opcode.can_store() {
            if let Some(memflags) = func.dfg.insts[inst].memflags() {
                if memflags.heap() {
                    self.heap = inst.into();
                } else if memflags.table() {
                    self.table = inst.into();
                } else if memflags.vmctx() {
                    self.vmctx = inst.into();
                } else {
                    self.other = inst.into();
                }
            } else {
                self.heap = inst.into();
                self.table = inst.into();
                self.vmctx = inst.into();
                self.other = inst.into();
            }
        }
    }

    fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> {
        if let Some(memflags) = func.dfg.insts[inst].memflags() {
            if memflags.heap() {
                self.heap
            } else if memflags.table() {
                self.table
            } else if memflags.vmctx() {
                self.vmctx
            } else {
                self.other
            }
        } else if func.dfg.insts[inst].opcode().can_load()
            || func.dfg.insts[inst].opcode().can_store()
        {
            inst.into()
        } else {
            PackedOption::default()
        }
    }

    fn meet_from(&mut self, other: &LastStores, loc: Inst) {
        let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> {
            match (a.into(), b.into()) {
                (None, None) => None.into(),
                (Some(a), None) => a,
                (None, Some(b)) => b,
                (Some(a), Some(b)) if a == b => a,
                _ => loc.into(),
            }
        };

        self.heap = meet(self.heap, other.heap);
        self.table = meet(self.table, other.table);
        self.vmctx = meet(self.vmctx, other.vmctx);
        self.other = meet(self.other, other.other);
    }
}

/// A key identifying a unique memory location.
///
/// For the result of a load to be equivalent to the result of another
/// load, or the store data from a store, we need for (i) the
/// "version" of memory (here ensured by having the same last store
/// instruction to touch the disjoint category of abstract state we're
/// accessing); (ii) the address must be the same (here ensured by
/// having the same SSA value, which doesn't change after computed);
/// (iii) the offset must be the same; and (iv) the accessed type and
/// extension mode (e.g., 8-to-32, signed) must be the same.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct MemoryLoc {
    last_store: PackedOption<Inst>,
    address: Value,
    offset: Offset32,
    ty: Type,
    /// We keep the *opcode* of the instruction that produced the
    /// value we record at this key if the opcode is anything other
    /// than an ordinary load or store. This is needed when we
    /// consider loads that extend the value: e.g., an 8-to-32
    /// sign-extending load will produce a 32-bit value from an 8-bit
    /// value in memory, so we can only reuse that (as part of RLE)
    /// for another load with the same extending opcode.
    ///
    /// We could improve the transform to insert explicit extend ops
    /// in place of extending loads when we know the memory value, but
    /// we haven't yet done this.
    extending_opcode: Option<Opcode>,
}

/// An alias-analysis pass.
pub struct AliasAnalysis<'a> {
    /// The domtree for the function.
    domtree: &'a DominatorTree,

    /// Input state to a basic block.
    block_input: FxHashMap<Block, LastStores>,

    /// Known memory-value equivalences. This is the result of the
    /// analysis. This is a mapping from (last store, address
    /// expression, offset, type) to SSA `Value`.
    ///
    /// We keep the defining inst around for quick dominance checks.
    mem_values: FxHashMap<MemoryLoc, (Inst, Value)>,
}

impl<'a> AliasAnalysis<'a> {
    /// Perform an alias analysis pass.
    pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> {
        trace!("alias analysis: input is:\n{:?}", func);
        let mut analysis = AliasAnalysis {
            domtree,
            block_input: FxHashMap::default(),
            mem_values: FxHashMap::default(),
        };

        analysis.compute_block_input_states(func);
        analysis
    }

    fn compute_block_input_states(&mut self, func: &Function) {
        let mut queue = vec![];
        let mut queue_set = FxHashSet::default();
        let entry = func.layout.entry_block().unwrap();
        queue.push(entry);
        queue_set.insert(entry);

        while let Some(block) = queue.pop() {
            queue_set.remove(&block);
            let mut state = self
                .block_input
                .entry(block)
                .or_insert_with(|| LastStores::default())
                .clone();

            trace!(
                "alias analysis: input to block{} is {:?}",
                block.index(),
                state
            );

            for inst in func.layout.block_insts(block) {
                state.update(func, inst);
                trace!("after inst{}: state is {:?}", inst.index(), state);
            }

            visit_block_succs(func, block, |_inst, succ, _from_table| {
                let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap();
                let updated = match self.block_input.get_mut(&succ) {
                    Some(succ_state) => {
                        let old = succ_state.clone();
                        succ_state.meet_from(&state, succ_first_inst);
                        *succ_state != old
                    }
                    None => {
                        self.block_input.insert(succ, state.clone());
                        true
                    }
                };

                if updated && queue_set.insert(succ) {
                    queue.push(succ);
                }
            });
        }
    }

    /// Get the starting state for a block.
    pub fn block_starting_state(&self, block: Block) -> LastStores {
        self.block_input
            .get(&block)
            .cloned()
            .unwrap_or_else(|| LastStores::default())
    }

    /// Process one instruction. Meant to be invoked in program order
    /// within a block, and ideally in RPO or at least some domtree
    /// preorder for maximal reuse.
    ///
    /// Returns `true` if instruction was removed.
    pub fn process_inst(
        &mut self,
        func: &mut Function,
        state: &mut LastStores,
        inst: Inst,
    ) -> Option<Value> {
        trace!(
            "alias analysis: scanning at inst{} with state {:?} ({:?})",
            inst.index(),
            state,
            func.dfg.insts[inst],
        );

        let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst)
        {
            let address = func.dfg.resolve_aliases(address);
            let opcode = func.dfg.insts[inst].opcode();

            if opcode.can_store() {
                let store_data = inst_store_data(func, inst).unwrap();
                let store_data = func.dfg.resolve_aliases(store_data);
                let mem_loc = MemoryLoc {
                    last_store: inst.into(),
                    address,
                    offset,
                    ty,
                    extending_opcode: get_ext_opcode(opcode),
                };
                trace!(
                    "alias analysis: at inst{}: store with data v{} at loc {:?}",
                    inst.index(),
                    store_data.index(),
                    mem_loc
                );
                self.mem_values.insert(mem_loc, (inst, store_data));

                None
            } else if opcode.can_load() {
                let last_store = state.get_last_store(func, inst);
                let load_result = func.dfg.inst_results(inst)[0];
                let mem_loc = MemoryLoc {
                    last_store,
                    address,
                    offset,
                    ty,
                    extending_opcode: get_ext_opcode(opcode),
                };
                trace!(
                    "alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
                    inst.index(),
                    last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
                    mem_loc
                );

                // Is there a Value already known to be stored
                // at this specific memory location?  If so,
                // we can alias the load result to this
                // already-known Value.
                //
                // Check if the definition dominates this
                // location; it might not, if it comes from a
                // load (stores will always dominate though if
                // their `last_store` survives through
                // meet-points to this use-site).
                let aliased =
                    if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {
                        trace!(
                            " -> sees known value v{} from inst{}",
                            value.index(),
                            def_inst.index()
                        );
                        if self.domtree.dominates(def_inst, inst, &func.layout) {
                            trace!(
                                " -> dominates; value equiv from v{} to v{} inserted",
                                load_result.index(),
                                value.index()
                            );
                            Some(value)
                        } else {
                            None
                        }
                    } else {
                        None
                    };

                // Otherwise, we can keep *this* load around
                // as a new equivalent value.
                if aliased.is_none() {
                    trace!(
                        " -> inserting load result v{} at loc {:?}",
                        load_result.index(),
                        mem_loc
                    );
                    self.mem_values.insert(mem_loc, (inst, load_result));
                }

                aliased
            } else {
                None
            }
        } else {
            None
        };

        state.update(func, inst);

        replacing_value
    }

    /// Make a pass and update known-redundant loads to aliased
    /// values. We interleave the updates with the memory-location
    /// tracking because resolving some aliases may expose others
    /// (e.g. in cases of double-indirection with two separate chains
    /// of loads).
    pub fn compute_and_update_aliases(&mut self, func: &mut Function) {
        let mut pos = FuncCursor::new(func);

        while let Some(block) = pos.next_block() {
            let mut state = self.block_starting_state(block);
            while let Some(inst) = pos.next_inst() {
                if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) {
                    let result = pos.func.dfg.inst_results(inst)[0];
                    pos.func.dfg.detach_results(inst);
                    pos.func.dfg.change_to_alias(result, replaced_result);
                    pos.remove_inst_and_step_back();
                }
            }
        }
    }
}

fn get_ext_opcode(op: Opcode) -> Option<Opcode> {
    debug_assert!(op.can_load() || op.can_store());
    match op {
        Opcode::Load | Opcode::Store => None,
        _ => Some(op),
    }
}