cranelift_codegen/remove_constant_phis.rs
1//! A Constant-Phi-Node removal pass.
2
3use crate::dominator_tree::DominatorTree;
4use crate::fx::FxHashMap;
5use crate::fx::FxHashSet;
6use crate::ir;
7use crate::ir::Function;
8use crate::ir::{Block, BlockCall, Inst, Value};
9use crate::timing;
10use bumpalo::Bump;
11use cranelift_entity::SecondaryMap;
12use smallvec::SmallVec;
13
14// A note on notation. For the sake of clarity, this file uses the phrase
15// "formal parameters" to mean the `Value`s listed in the block head, and
16// "actual parameters" to mean the `Value`s passed in a branch or a jump:
17//
18// block4(v16: i32, v18: i32): <-- formal parameters
19// ...
20// brif v27, block7(v22, v24), block6 <-- actual parameters
21
22// This transformation pass (conceptually) partitions all values in the
23// function into two groups:
24//
25// * Group A: values defined by block formal parameters, except for the entry block.
26//
27// * Group B: All other values: that is, values defined by instructions,
28// and the formals of the entry block.
29//
30// For each value in Group A, it attempts to establish whether it will have
31// the value of exactly one member of Group B. If so, the formal parameter is
32// deleted, all corresponding actual parameters (in jumps/branches to the
33// defining block) are deleted, and a rename is inserted.
34//
35// The entry block is special-cased because (1) we don't know what values flow
36// to its formals and (2) in any case we can't change its formals.
37//
38// Work proceeds in three phases.
39//
40// * Phase 1: examine all instructions. For each block, make up a useful
41// grab-bag of information, `BlockSummary`, that summarises the block's
42// formals and jump/branch instruction. This is used by Phases 2 and 3.
43//
44// * Phase 2: for each value in Group A, try to find a single Group B value
45// that flows to it. This is done using a classical iterative forward
46// dataflow analysis over a simple constant-propagation style lattice. It
47// converges quickly in practice -- I have seen at most 4 iterations. This
48// is relatively cheap because the iteration is done over the
49// `BlockSummary`s, and does not visit each instruction. The resulting
50// fixed point is stored in a `SolverState`.
51//
52// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
53// remove redundant formals and actuals, and to insert suitable renames.
54//
55// Note that the effectiveness of the analysis depends on on the fact that
56// there are no copy instructions in Cranelift's IR. If there were, the
57// computation of `actual_absval` in Phase 2 would have to be extended to
58// chase through such copies.
59//
60// For large functions, the analysis cost using the new AArch64 backend is about
61// 0.6% of the non-optimising compile time, as measured by instruction counts.
62// This transformation usually pays for itself several times over, though, by
63// reducing the isel/regalloc cost downstream. Gains of up to 7% have been
64// seen for large functions.
65
66/// The `Value`s (Group B) that can flow to a formal parameter (Group A).
67#[derive(Clone, Copy, Debug, PartialEq)]
68enum AbstractValue {
69 /// Two or more values flow to this formal.
70 Many,
71
72 /// Exactly one value, as stated, flows to this formal. The `Value`s that
73 /// can appear here are exactly: `Value`s defined by `Inst`s, plus the
74 /// `Value`s defined by the formals of the entry block. Note that this is
75 /// exactly the set of `Value`s that are *not* tracked in the solver below
76 /// (see `SolverState`).
77 One(Value /*Group B*/),
78
79 /// No value flows to this formal.
80 None,
81}
82
83impl AbstractValue {
84 fn join(self, other: AbstractValue) -> AbstractValue {
85 match (self, other) {
86 // Joining with `None` has no effect
87 (AbstractValue::None, p2) => p2,
88 (p1, AbstractValue::None) => p1,
89 // Joining with `Many` produces `Many`
90 (AbstractValue::Many, _p2) => AbstractValue::Many,
91 (_p1, AbstractValue::Many) => AbstractValue::Many,
92 // The only interesting case
93 (AbstractValue::One(v1), AbstractValue::One(v2)) => {
94 if v1 == v2 {
95 AbstractValue::One(v1)
96 } else {
97 AbstractValue::Many
98 }
99 }
100 }
101 }
102
103 fn is_one(self) -> bool {
104 matches!(self, AbstractValue::One(_))
105 }
106}
107
108#[derive(Clone, Copy, Debug)]
109struct OutEdge<'a> {
110 /// An instruction that transfers control.
111 inst: Inst,
112 /// The index into branch_destinations for this instruction that corresponds
113 /// to this edge.
114 branch_index: u32,
115 /// The block that control is transferred to.
116 block: Block,
117 /// The arguments to that block.
118 ///
119 /// These values can be from both groups A and B.
120 args: &'a [Value],
121}
122
123impl<'a> OutEdge<'a> {
124 /// Construct a new `OutEdge` for the given instruction.
125 ///
126 /// Returns `None` if this is an edge without any block arguments, which
127 /// means we can ignore it for this analysis's purposes.
128 #[inline]
129 fn new(
130 bump: &'a Bump,
131 dfg: &ir::DataFlowGraph,
132 inst: Inst,
133 branch_index: usize,
134 block: BlockCall,
135 ) -> Option<Self> {
136 let inst_var_args = block.args_slice(&dfg.value_lists);
137
138 // Skip edges without params.
139 if inst_var_args.is_empty() {
140 return None;
141 }
142
143 Some(OutEdge {
144 inst,
145 branch_index: branch_index as u32,
146 block: block.block(&dfg.value_lists),
147 args: bump.alloc_slice_fill_iter(
148 inst_var_args
149 .iter()
150 .map(|value| dfg.resolve_aliases(*value)),
151 ),
152 })
153 }
154}
155
156/// For some block, a useful bundle of info. The `Block` itself is not stored
157/// here since it will be the key in the associated `FxHashMap` -- see
158/// `summaries` below. For the `SmallVec` tuning params: most blocks have
159/// few parameters, hence `4`. And almost all blocks have either one or two
160/// successors, hence `2`.
161#[derive(Clone, Debug, Default)]
162struct BlockSummary<'a> {
163 /// Formal parameters for this `Block`.
164 ///
165 /// These values are from group A.
166 formals: &'a [Value],
167
168 /// Each outgoing edge from this block.
169 ///
170 /// We don't bother to include transfers that pass zero parameters
171 /// since that makes more work for the solver for no purpose.
172 ///
173 /// We optimize for the case where a branch instruction has up to two
174 /// outgoing edges, as unconditional jumps and conditional branches are
175 /// more prominent than br_table.
176 dests: SmallVec<[OutEdge<'a>; 2]>,
177}
178
179impl<'a> BlockSummary<'a> {
180 /// Construct a new `BlockSummary`, using `values` as its backing storage.
181 #[inline]
182 fn new(bump: &'a Bump, formals: &[Value]) -> Self {
183 Self {
184 formals: bump.alloc_slice_copy(formals),
185 dests: Default::default(),
186 }
187 }
188}
189
190/// Solver state. This holds a AbstractValue for each formal parameter, except
191/// for those from the entry block.
192struct SolverState {
193 absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
194}
195
196impl SolverState {
197 fn new() -> Self {
198 Self {
199 absvals: FxHashMap::default(),
200 }
201 }
202
203 fn get(&self, actual: Value) -> AbstractValue {
204 *self
205 .absvals
206 .get(&actual)
207 .unwrap_or_else(|| panic!("SolverState::get: formal param {:?} is untracked?!", actual))
208 }
209
210 fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
211 self.absvals.get(&actual)
212 }
213
214 fn set(&mut self, actual: Value, lp: AbstractValue) {
215 match self.absvals.insert(actual, lp) {
216 Some(_old_lp) => {}
217 None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
218 }
219 }
220}
221
222/// Detect phis in `func` that will only ever produce one value, using a
223/// classic forward dataflow analysis. Then remove them.
224#[inline(never)]
225pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
226 let _tt = timing::remove_constant_phis();
227 debug_assert!(domtree.is_valid());
228
229 // Phase 1 of 3: for each block, make a summary containing all relevant
230 // info. The solver will iterate over the summaries, rather than having
231 // to inspect each instruction in each block.
232 let bump =
233 Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
234 let mut summaries =
235 SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
236
237 for b in domtree.cfg_postorder().iter().rev().copied() {
238 let formals = func.dfg.block_params(b);
239 let mut summary = BlockSummary::new(&bump, formals);
240
241 for inst in func.layout.block_insts(b) {
242 for (ix, dest) in func.dfg.insts[inst]
243 .branch_destination(&func.dfg.jump_tables)
244 .iter()
245 .enumerate()
246 {
247 if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) {
248 summary.dests.push(edge);
249 }
250 }
251 }
252
253 // Ensure the invariant that all blocks (except for the entry) appear
254 // in the summary, *unless* they have neither formals nor any
255 // param-carrying branches/jumps.
256 if formals.len() > 0 || summary.dests.len() > 0 {
257 summaries[b] = summary;
258 }
259 }
260
261 // Phase 2 of 3: iterate over the summaries in reverse postorder,
262 // computing new `AbstractValue`s for each tracked `Value`. The set of
263 // tracked `Value`s is exactly Group A as described above.
264
265 let entry_block = func
266 .layout
267 .entry_block()
268 .expect("remove_constant_phis: entry block unknown");
269
270 // Set up initial solver state
271 let mut state = SolverState::new();
272
273 for b in domtree.cfg_postorder().iter().rev().copied() {
274 // For each block, get the formals
275 if b == entry_block {
276 continue;
277 }
278 let formals = func.dfg.block_params(b);
279 for formal in formals {
280 let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
281 assert!(mb_old_absval.is_none());
282 }
283 }
284
285 // Solve: repeatedly traverse the blocks in reverse postorder, until there
286 // are no changes.
287 let mut iter_no = 0;
288 loop {
289 iter_no += 1;
290 let mut changed = false;
291
292 for src in domtree.cfg_postorder().iter().rev().copied() {
293 let src_summary = &summaries[src];
294 for edge in &src_summary.dests {
295 assert!(edge.block != entry_block);
296 // By contrast, the dst block must have a summary. Phase 1
297 // will have only included an entry in `src_summary.dests` if
298 // that branch/jump carried at least one parameter. So the
299 // dst block does take parameters, so it must have a summary.
300 let dst_summary = &summaries[edge.block];
301 let dst_formals = &dst_summary.formals;
302 assert_eq!(edge.args.len(), dst_formals.len());
303 for (formal, actual) in dst_formals.iter().zip(edge.args) {
304 // Find the abstract value for `actual`. If it is a block
305 // formal parameter then the most recent abstract value is
306 // to be found in the solver state. If not, then it's a
307 // real value defining point (not a phi), in which case
308 // return it itself.
309 let actual_absval = match state.maybe_get(*actual) {
310 Some(pt) => *pt,
311 None => AbstractValue::One(*actual),
312 };
313
314 // And `join` the new value with the old.
315 let formal_absval_old = state.get(*formal);
316 let formal_absval_new = formal_absval_old.join(actual_absval);
317 if formal_absval_new != formal_absval_old {
318 changed = true;
319 state.set(*formal, formal_absval_new);
320 }
321 }
322 }
323 }
324
325 if !changed {
326 break;
327 }
328 }
329
330 let mut n_consts = 0;
331 for absval in state.absvals.values() {
332 if absval.is_one() {
333 n_consts += 1;
334 }
335 }
336
337 // Phase 3 of 3: edit the function to remove constant formals, using the
338 // summaries and the final solver state as a guide.
339
340 // Make up a set of blocks that need editing.
341 let mut need_editing = FxHashSet::<Block>::default();
342 for (block, summary) in summaries.iter() {
343 if block == entry_block {
344 continue;
345 }
346 for formal in summary.formals {
347 let formal_absval = state.get(*formal);
348 if formal_absval.is_one() {
349 need_editing.insert(block);
350 break;
351 }
352 }
353 }
354
355 // Firstly, deal with the formals. For each formal which is redundant,
356 // remove it, and also add a reroute from it to the constant value which
357 // it we know it to be.
358 for b in &need_editing {
359 let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
360 let formals: &[Value] = func.dfg.block_params(*b);
361 for formal in formals {
362 // The state must give an absval for `formal`.
363 if let AbstractValue::One(replacement_val) = state.get(*formal) {
364 del_these.push((*formal, replacement_val));
365 }
366 }
367 // We can delete the formals in any order. However,
368 // `remove_block_param` works by sliding backwards all arguments to
369 // the right of the value it is asked to delete. Hence when removing more
370 // than one formal, it is significantly more efficient to ask it to
371 // remove the rightmost formal first, and hence this `rev()`.
372 for (redundant_formal, replacement_val) in del_these.into_iter().rev() {
373 func.dfg.remove_block_param(redundant_formal);
374 func.dfg.change_to_alias(redundant_formal, replacement_val);
375 }
376 }
377
378 // Secondly, visit all branch insns. If the destination has had its
379 // formals changed, change the actuals accordingly. Don't scan all insns,
380 // rather just visit those as listed in the summaries we prepared earlier.
381 let mut old_actuals = alloc::vec::Vec::new();
382 for summary in summaries.values() {
383 for edge in &summary.dests {
384 if !need_editing.contains(&edge.block) {
385 continue;
386 }
387
388 let dfg = &mut func.dfg;
389 let dests = dfg.insts[edge.inst].branch_destination_mut(&mut dfg.jump_tables);
390 let block = &mut dests[edge.branch_index as usize];
391
392 old_actuals.extend(block.args_slice(&dfg.value_lists));
393
394 // Check that the numbers of arguments make sense.
395 let formals = &summaries[edge.block].formals;
396 assert_eq!(formals.len(), old_actuals.len());
397
398 // Filter out redundant block arguments.
399 let mut formals = formals.iter();
400 old_actuals.retain(|_| {
401 let formal_i = formals.next().unwrap();
402 !state.get(*formal_i).is_one()
403 });
404
405 // Replace the block with a new one that only includes the non-redundant arguments.
406 // This leaks the value list from the old block,
407 // https://github.com/bytecodealliance/wasmtime/issues/5451 for more information.
408 let destination = block.block(&dfg.value_lists);
409 *block = BlockCall::new(destination, &old_actuals, &mut dfg.value_lists);
410 old_actuals.clear();
411 }
412 }
413
414 log::debug!(
415 "do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",
416 iter_no,
417 state.absvals.len(),
418 n_consts
419 );
420}