1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
//! A Constant-Phi-Node removal pass.
use crate::dominator_tree::DominatorTree;
use crate::fx::FxHashMap;
use crate::fx::FxHashSet;
use crate::ir;
use crate::ir::Function;
use crate::ir::{Block, BlockCall, Inst, Value};
use crate::timing;
use bumpalo::Bump;
use cranelift_entity::SecondaryMap;
use smallvec::SmallVec;
// A note on notation. For the sake of clarity, this file uses the phrase
// "formal parameters" to mean the `Value`s listed in the block head, and
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
//
// block4(v16: i32, v18: i32): <-- formal parameters
// ...
// brif v27, block7(v22, v24), block6 <-- actual parameters
// This transformation pass (conceptually) partitions all values in the
// function into two groups:
//
// * Group A: values defined by block formal parameters, except for the entry block.
//
// * Group B: All other values: that is, values defined by instructions,
// and the formals of the entry block.
//
// For each value in Group A, it attempts to establish whether it will have
// the value of exactly one member of Group B. If so, the formal parameter is
// deleted, all corresponding actual parameters (in jumps/branches to the
// defining block) are deleted, and a rename is inserted.
//
// The entry block is special-cased because (1) we don't know what values flow
// to its formals and (2) in any case we can't change its formals.
//
// Work proceeds in three phases.
//
// * Phase 1: examine all instructions. For each block, make up a useful
// grab-bag of information, `BlockSummary`, that summarises the block's
// formals and jump/branch instruction. This is used by Phases 2 and 3.
//
// * Phase 2: for each value in Group A, try to find a single Group B value
// that flows to it. This is done using a classical iterative forward
// dataflow analysis over a simple constant-propagation style lattice. It
// converges quickly in practice -- I have seen at most 4 iterations. This
// is relatively cheap because the iteration is done over the
// `BlockSummary`s, and does not visit each instruction. The resulting
// fixed point is stored in a `SolverState`.
//
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
// remove redundant formals and actuals, and to insert suitable renames.
//
// Note that the effectiveness of the analysis depends on on the fact that
// there are no copy instructions in Cranelift's IR. If there were, the
// computation of `actual_absval` in Phase 2 would have to be extended to
// chase through such copies.
//
// For large functions, the analysis cost using the new AArch64 backend is about
// 0.6% of the non-optimising compile time, as measured by instruction counts.
// This transformation usually pays for itself several times over, though, by
// reducing the isel/regalloc cost downstream. Gains of up to 7% have been
// seen for large functions.
/// The `Value`s (Group B) that can flow to a formal parameter (Group A).
#[derive(Clone, Copy, Debug, PartialEq)]
enum AbstractValue {
/// Two or more values flow to this formal.
Many,
/// Exactly one value, as stated, flows to this formal. The `Value`s that
/// can appear here are exactly: `Value`s defined by `Inst`s, plus the
/// `Value`s defined by the formals of the entry block. Note that this is
/// exactly the set of `Value`s that are *not* tracked in the solver below
/// (see `SolverState`).
One(Value /*Group B*/),
/// No value flows to this formal.
None,
}
impl AbstractValue {
fn join(self, other: AbstractValue) -> AbstractValue {
match (self, other) {
// Joining with `None` has no effect
(AbstractValue::None, p2) => p2,
(p1, AbstractValue::None) => p1,
// Joining with `Many` produces `Many`
(AbstractValue::Many, _p2) => AbstractValue::Many,
(_p1, AbstractValue::Many) => AbstractValue::Many,
// The only interesting case
(AbstractValue::One(v1), AbstractValue::One(v2)) => {
if v1 == v2 {
AbstractValue::One(v1)
} else {
AbstractValue::Many
}
}
}
}
fn is_one(self) -> bool {
matches!(self, AbstractValue::One(_))
}
}
#[derive(Clone, Copy, Debug)]
struct OutEdge<'a> {
/// An instruction that transfers control.
inst: Inst,
/// The index into branch_destinations for this instruction that corresponds
/// to this edge.
branch_index: u32,
/// The block that control is transferred to.
block: Block,
/// The arguments to that block.
///
/// These values can be from both groups A and B.
args: &'a [Value],
}
impl<'a> OutEdge<'a> {
/// Construct a new `OutEdge` for the given instruction.
///
/// Returns `None` if this is an edge without any block arguments, which
/// means we can ignore it for this analysis's purposes.
#[inline]
fn new(
bump: &'a Bump,
dfg: &ir::DataFlowGraph,
inst: Inst,
branch_index: usize,
block: BlockCall,
) -> Option<Self> {
let inst_var_args = block.args_slice(&dfg.value_lists);
// Skip edges without params.
if inst_var_args.is_empty() {
return None;
}
Some(OutEdge {
inst,
branch_index: branch_index as u32,
block: block.block(&dfg.value_lists),
args: bump.alloc_slice_fill_iter(
inst_var_args
.iter()
.map(|value| dfg.resolve_aliases(*value)),
),
})
}
}
/// For some block, a useful bundle of info. The `Block` itself is not stored
/// here since it will be the key in the associated `FxHashMap` -- see
/// `summaries` below. For the `SmallVec` tuning params: most blocks have
/// few parameters, hence `4`. And almost all blocks have either one or two
/// successors, hence `2`.
#[derive(Clone, Debug, Default)]
struct BlockSummary<'a> {
/// Formal parameters for this `Block`.
///
/// These values are from group A.
formals: &'a [Value],
/// Each outgoing edge from this block.
///
/// We don't bother to include transfers that pass zero parameters
/// since that makes more work for the solver for no purpose.
///
/// We optimize for the case where a branch instruction has up to two
/// outgoing edges, as unconditional jumps and conditional branches are
/// more prominent than br_table.
dests: SmallVec<[OutEdge<'a>; 2]>,
}
impl<'a> BlockSummary<'a> {
/// Construct a new `BlockSummary`, using `values` as its backing storage.
#[inline]
fn new(bump: &'a Bump, formals: &[Value]) -> Self {
Self {
formals: bump.alloc_slice_copy(formals),
dests: Default::default(),
}
}
}
/// Solver state. This holds a AbstractValue for each formal parameter, except
/// for those from the entry block.
struct SolverState {
absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
}
impl SolverState {
fn new() -> Self {
Self {
absvals: FxHashMap::default(),
}
}
fn get(&self, actual: Value) -> AbstractValue {
*self
.absvals
.get(&actual)
.unwrap_or_else(|| panic!("SolverState::get: formal param {:?} is untracked?!", actual))
}
fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
self.absvals.get(&actual)
}
fn set(&mut self, actual: Value, lp: AbstractValue) {
match self.absvals.insert(actual, lp) {
Some(_old_lp) => {}
None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
}
}
}
/// Detect phis in `func` that will only ever produce one value, using a
/// classic forward dataflow analysis. Then remove them.
#[inline(never)]
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
let _tt = timing::remove_constant_phis();
debug_assert!(domtree.is_valid());
// Phase 1 of 3: for each block, make a summary containing all relevant
// info. The solver will iterate over the summaries, rather than having
// to inspect each instruction in each block.
let bump =
Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
let mut summaries =
SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
for b in domtree.cfg_postorder().iter().rev().copied() {
let formals = func.dfg.block_params(b);
let mut summary = BlockSummary::new(&bump, formals);
for inst in func.layout.block_insts(b) {
for (ix, dest) in func.dfg.insts[inst]
.branch_destination(&func.dfg.jump_tables)
.iter()
.enumerate()
{
if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) {
summary.dests.push(edge);
}
}
}
// Ensure the invariant that all blocks (except for the entry) appear
// in the summary, *unless* they have neither formals nor any
// param-carrying branches/jumps.
if formals.len() > 0 || summary.dests.len() > 0 {
summaries[b] = summary;
}
}
// Phase 2 of 3: iterate over the summaries in reverse postorder,
// computing new `AbstractValue`s for each tracked `Value`. The set of
// tracked `Value`s is exactly Group A as described above.
let entry_block = func
.layout
.entry_block()
.expect("remove_constant_phis: entry block unknown");
// Set up initial solver state
let mut state = SolverState::new();
for b in domtree.cfg_postorder().iter().rev().copied() {
// For each block, get the formals
if b == entry_block {
continue;
}
let formals = func.dfg.block_params(b);
for formal in formals {
let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
assert!(mb_old_absval.is_none());
}
}
// Solve: repeatedly traverse the blocks in reverse postorder, until there
// are no changes.
let mut iter_no = 0;
loop {
iter_no += 1;
let mut changed = false;
for src in domtree.cfg_postorder().iter().rev().copied() {
let src_summary = &summaries[src];
for edge in &src_summary.dests {
assert!(edge.block != entry_block);
// By contrast, the dst block must have a summary. Phase 1
// will have only included an entry in `src_summary.dests` if
// that branch/jump carried at least one parameter. So the
// dst block does take parameters, so it must have a summary.
let dst_summary = &summaries[edge.block];
let dst_formals = &dst_summary.formals;
assert_eq!(edge.args.len(), dst_formals.len());
for (formal, actual) in dst_formals.iter().zip(edge.args) {
// Find the abstract value for `actual`. If it is a block
// formal parameter then the most recent abstract value is
// to be found in the solver state. If not, then it's a
// real value defining point (not a phi), in which case
// return it itself.
let actual_absval = match state.maybe_get(*actual) {
Some(pt) => *pt,
None => AbstractValue::One(*actual),
};
// And `join` the new value with the old.
let formal_absval_old = state.get(*formal);
let formal_absval_new = formal_absval_old.join(actual_absval);
if formal_absval_new != formal_absval_old {
changed = true;
state.set(*formal, formal_absval_new);
}
}
}
}
if !changed {
break;
}
}
let mut n_consts = 0;
for absval in state.absvals.values() {
if absval.is_one() {
n_consts += 1;
}
}
// Phase 3 of 3: edit the function to remove constant formals, using the
// summaries and the final solver state as a guide.
// Make up a set of blocks that need editing.
let mut need_editing = FxHashSet::<Block>::default();
for (block, summary) in summaries.iter() {
if block == entry_block {
continue;
}
for formal in summary.formals {
let formal_absval = state.get(*formal);
if formal_absval.is_one() {
need_editing.insert(block);
break;
}
}
}
// Firstly, deal with the formals. For each formal which is redundant,
// remove it, and also add a reroute from it to the constant value which
// it we know it to be.
for b in &need_editing {
let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
let formals: &[Value] = func.dfg.block_params(*b);
for formal in formals {
// The state must give an absval for `formal`.
if let AbstractValue::One(replacement_val) = state.get(*formal) {
del_these.push((*formal, replacement_val));
}
}
// We can delete the formals in any order. However,
// `remove_block_param` works by sliding backwards all arguments to
// the right of the value it is asked to delete. Hence when removing more
// than one formal, it is significantly more efficient to ask it to
// remove the rightmost formal first, and hence this `rev()`.
for (redundant_formal, replacement_val) in del_these.into_iter().rev() {
func.dfg.remove_block_param(redundant_formal);
func.dfg.change_to_alias(redundant_formal, replacement_val);
}
}
// Secondly, visit all branch insns. If the destination has had its
// formals changed, change the actuals accordingly. Don't scan all insns,
// rather just visit those as listed in the summaries we prepared earlier.
let mut old_actuals = alloc::vec::Vec::new();
for summary in summaries.values() {
for edge in &summary.dests {
if !need_editing.contains(&edge.block) {
continue;
}
let dfg = &mut func.dfg;
let dests = dfg.insts[edge.inst].branch_destination_mut(&mut dfg.jump_tables);
let block = &mut dests[edge.branch_index as usize];
old_actuals.extend(block.args_slice(&dfg.value_lists));
// Check that the numbers of arguments make sense.
let formals = &summaries[edge.block].formals;
assert_eq!(formals.len(), old_actuals.len());
// Filter out redundant block arguments.
let mut formals = formals.iter();
old_actuals.retain(|_| {
let formal_i = formals.next().unwrap();
!state.get(*formal_i).is_one()
});
// Replace the block with a new one that only includes the non-redundant arguments.
// This leaks the value list from the old block,
// https://github.com/bytecodealliance/wasmtime/issues/5451 for more information.
let destination = block.block(&dfg.value_lists);
*block = BlockCall::new(destination, &old_actuals, &mut dfg.value_lists);
old_actuals.clear();
}
}
log::debug!(
"do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",
iter_no,
state.absvals.len(),
n_consts
);
}