1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
//! Computation of basic block order in emitted code.
//!
//! This module handles the translation from CLIF BBs to VCode BBs.
//!
//! The basic idea is that we compute a sequence of "lowered blocks" that
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
//! moves for phi-nodes on every block-to-block transfer; these blocks always
//! conceptually exist, but may be merged with an "original" CLIF block (and
//! hence not actually exist; this is equivalent to inserting the blocks only on
//! critical edges).
//!
//! In other words, starting from a CFG like this (where each "CLIF block" and
//! "(edge N->M)" is a separate basic block):
//!
//! ```plain
//!
//! CLIF block 0
//! / \
//! (edge 0->1) (edge 0->2)
//! | |
//! CLIF block 1 CLIF block 2
//! \ /
//! (edge 1->3) (edge 2->3)
//! \ /
//! CLIF block 3
//! ```
//!
//! We can produce a CFG of lowered blocks like so:
//!
//! ```plain
//! +--------------+
//! | CLIF block 0 |
//! +--------------+
//! / \
//! +--------------+ +--------------+
//! | (edge 0->1) | | (edge 0->2) |
//! | CLIF block 1 | | CLIF block 2 |
//! | (edge 1->3) | | (edge 2->3) |
//! +--------------+ +--------------+
//! \ /
//! \ /
//! +------------+
//! |CLIF block 3|
//! +------------+
//! ```
//!
//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
//!
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
//! (never actually materialized, just defined by a "successors" function), and
//! compute the reverse postorder.
//!
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
//! example, consider any information about whether edge blocks will actually
//! have content, because this computation happens as part of lowering *before*
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
//! particular edge. But it works relatively well and is conceptually simple.
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
//! branch editing that in practice elides empty blocks and simplifies some of
//! the other redundancies that this scheme produces.
use crate::dominator_tree::DominatorTree;
use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::visit_block_succs;
use crate::ir::{Block, Function, Inst, Opcode};
use crate::{machinst::*, trace};
use smallvec::SmallVec;
/// Mapping from CLIF BBs to VCode BBs.
#[derive(Debug)]
pub struct BlockLoweringOrder {
/// Lowered blocks, in BlockIndex order. Each block is some combination of
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
/// see [LoweredBlock] for details.
lowered_order: Vec<LoweredBlock>,
/// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
lowered_succ_indices: Vec<BlockIndex>,
/// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
/// block. Indexed by lowering-order index (`BlockIndex`).
lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,
/// Cold blocks. These blocks are not reordered in the
/// `lowered_order` above; the lowered order must respect RPO
/// (uses after defs) in order for lowering to be
/// correct. Instead, this set is used to provide `is_cold()`,
/// which is used by VCode emission to sink the blocks at the last
/// moment (when we actually emit bytes into the MachBuffer).
cold_blocks: FxHashSet<BlockIndex>,
/// Lowered blocks that are indirect branch targets.
indirect_branch_targets: FxHashSet<BlockIndex>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum LoweredBlock {
/// Block in original CLIF.
Orig {
/// Original CLIF block.
block: Block,
},
/// Critical edge between two CLIF blocks.
CriticalEdge {
/// The predecessor block.
pred: Block,
/// The successor block.
succ: Block,
/// The index of this branch in the successor edges from `pred`, following the same
/// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
/// multiple edges between the same CLIF blocks.
succ_idx: u32,
},
}
impl LoweredBlock {
/// Unwrap an `Orig` block.
pub fn orig_block(&self) -> Option<Block> {
match self {
&LoweredBlock::Orig { block } => Some(block),
&LoweredBlock::CriticalEdge { .. } => None,
}
}
/// The associated in-edge predecessor, if this is a critical edge.
#[cfg(test)]
pub fn in_edge(&self) -> Option<Block> {
match self {
&LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
&LoweredBlock::Orig { .. } => None,
}
}
/// The associated out-edge successor, if this is a critical edge.
#[cfg(test)]
pub fn out_edge(&self) -> Option<Block> {
match self {
&LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
&LoweredBlock::Orig { .. } => None,
}
}
}
impl BlockLoweringOrder {
/// Compute and return a lowered block order for `f`.
pub fn new(f: &Function, domtree: &DominatorTree) -> BlockLoweringOrder {
trace!("BlockLoweringOrder: function body {:?}", f);
// Step 1: compute the in-edge and out-edge count of every block.
let mut block_in_count = SecondaryMap::with_default(0);
let mut block_out_count = SecondaryMap::with_default(0);
// Block successors are stored as `LoweredBlocks` to simplify the construction of
// `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
// updated to be `CriticalEdge` when those cases are identified in step 2 below.
let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
let mut block_succ_range = SecondaryMap::with_default(0..0);
let mut indirect_branch_target_clif_blocks = FxHashSet::default();
for block in f.layout.blocks() {
let start = block_succs.len();
visit_block_succs(f, block, |_, succ, from_table| {
block_out_count[block] += 1;
block_in_count[succ] += 1;
block_succs.push(LoweredBlock::Orig { block: succ });
if from_table {
indirect_branch_target_clif_blocks.insert(succ);
}
});
// Ensure that blocks terminated by br_table instructions with an empty jump table are
// still treated like conditional blocks from the point of view of critical edge
// splitting.
if let Some(inst) = f.layout.last_inst(block) {
if Opcode::BrTable == f.dfg.insts[inst].opcode() {
block_out_count[block] = block_out_count[block].max(2);
}
}
let end = block_succs.len();
block_succ_range[block] = start..end;
}
// Step 2: walk the postorder from the domtree in reverse to produce our desired node
// lowering order, identifying critical edges to split along the way.
let mut lb_to_bindex = FxHashMap::default();
let mut lowered_order = Vec::new();
for &block in domtree.cfg_postorder().iter().rev() {
let lb = LoweredBlock::Orig { block };
let bindex = BlockIndex::new(lowered_order.len());
lb_to_bindex.insert(lb.clone(), bindex);
lowered_order.push(lb);
if block_out_count[block] > 1 {
let range = block_succ_range[block].clone();
for (succ_ix, lb) in block_succs[range].iter_mut().enumerate() {
let succ = lb.orig_block().unwrap();
if block_in_count[succ] > 1 {
// Mutate the successor to be a critical edge, as `block` has multiple
// edges leaving it, and `succ` has multiple edges entering it.
*lb = LoweredBlock::CriticalEdge {
pred: block,
succ,
succ_idx: succ_ix as u32,
};
let bindex = BlockIndex::new(lowered_order.len());
lb_to_bindex.insert(*lb, bindex);
lowered_order.push(*lb);
}
}
}
}
// Step 3: build the successor tables given the lowering order. We can't perform this step
// during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
// first.
let mut lowered_succ_indices = Vec::new();
let mut cold_blocks = FxHashSet::default();
let mut indirect_branch_targets = FxHashSet::default();
let lowered_succ_ranges =
Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
let bindex = BlockIndex::new(ix);
let start = lowered_succ_indices.len();
let opt_inst = match lb {
// Block successors are pulled directly over, as they'll have been mutated when
// determining the block order already.
&LoweredBlock::Orig { block } => {
let range = block_succ_range[block].clone();
lowered_succ_indices
.extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
if f.layout.is_cold(block) {
cold_blocks.insert(bindex);
}
if indirect_branch_target_clif_blocks.contains(&block) {
indirect_branch_targets.insert(bindex);
}
let last = f.layout.last_inst(block).unwrap();
let opcode = f.dfg.insts[last].opcode();
assert!(opcode.is_terminator());
opcode.is_branch().then_some(last)
}
// Critical edges won't have successor information in block_succ_range, but
// they only have a single known successor to record anyway.
&LoweredBlock::CriticalEdge { succ, .. } => {
let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
lowered_succ_indices.push(succ_index);
// Edges inherit indirect branch and cold block metadata from their
// successor.
if f.layout.is_cold(succ) {
cold_blocks.insert(bindex);
}
if indirect_branch_target_clif_blocks.contains(&succ) {
indirect_branch_targets.insert(bindex);
}
None
}
};
let end = lowered_succ_indices.len();
(opt_inst, start..end)
}));
let result = BlockLoweringOrder {
lowered_order,
lowered_succ_indices,
lowered_succ_ranges,
cold_blocks,
indirect_branch_targets,
};
trace!("BlockLoweringOrder: {:#?}", result);
result
}
/// Get the lowered order of blocks.
pub fn lowered_order(&self) -> &[LoweredBlock] {
&self.lowered_order[..]
}
/// Get the successor indices for a lowered block.
pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
(opt_inst.clone(), &self.lowered_succ_indices[range.clone()])
}
/// Determine whether the given lowered-block index is cold.
pub fn is_cold(&self, block: BlockIndex) -> bool {
self.cold_blocks.contains(&block)
}
/// Determine whether the given lowered block index is an indirect branch
/// target.
pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
self.indirect_branch_targets.contains(&block)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::types::*;
use crate::ir::UserFuncName;
use crate::ir::{AbiParam, Function, InstBuilder, Signature};
use crate::isa::CallConv;
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
assert!(n_blocks > 0);
let name = UserFuncName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let blocks = (0..n_blocks)
.map(|i| {
let bb = func.dfg.make_block();
assert!(bb.as_u32() == i as u32);
bb
})
.collect::<Vec<_>>();
let arg0 = func.dfg.append_block_param(blocks[0], I32);
let mut pos = FuncCursor::new(&mut func);
let mut edge = 0;
for i in 0..n_blocks {
pos.insert_block(blocks[i]);
let mut succs = vec![];
while edge < edges.len() && edges[edge].0 == i {
succs.push(edges[edge].1);
edge += 1;
}
if succs.len() == 0 {
pos.ins().return_(&[arg0]);
} else if succs.len() == 1 {
pos.ins().jump(blocks[succs[0]], &[]);
} else if succs.len() == 2 {
pos.ins()
.brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);
} else {
panic!("Too many successors");
}
}
let mut cfg = ControlFlowGraph::new();
cfg.compute(&func);
let dom_tree = DominatorTree::with_function(&func, &cfg);
BlockLoweringOrder::new(&func, &dom_tree)
}
#[test]
fn test_blockorder_diamond() {
let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
// This test case doesn't need to introduce any critical edges, as all regalloc allocations
// can sit on either the entry or exit of blocks 1 and 2.
assert_eq!(order.lowered_order.len(), 4);
}
#[test]
fn test_blockorder_critedge() {
// 0
// / \
// 1 2
// / \ \
// 3 4 |
// |\ _|____|
// | \/ |
// | /\ |
// 5 6
//
// (3 -> 5, and 3 -> 6 are critical edges and must be split)
//
let order = build_test_func(
7,
&[
(0, 1),
(0, 2),
(1, 3),
(1, 4),
(2, 5),
(3, 5),
(3, 6),
(4, 6),
],
);
assert_eq!(order.lowered_order.len(), 9);
println!("ordered = {:?}", order.lowered_order);
// block 0
assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
// block 2
assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
assert!(order.lowered_order[1].in_edge().is_none());
assert!(order.lowered_order[1].out_edge().is_none());
// block 1
assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
assert!(order.lowered_order[2].in_edge().is_none());
assert!(order.lowered_order[2].out_edge().is_none());
// block 4
assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
assert!(order.lowered_order[3].in_edge().is_none());
assert!(order.lowered_order[3].out_edge().is_none());
// block 3
assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().is_none());
// critical edge 3 -> 5
assert!(order.lowered_order[5].orig_block().is_none());
assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
// critical edge 3 -> 6
assert!(order.lowered_order[6].orig_block().is_none());
assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
// block 6
assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
assert!(order.lowered_order[7].in_edge().is_none());
assert!(order.lowered_order[7].out_edge().is_none());
// block 5
assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
assert!(order.lowered_order[8].in_edge().is_none());
assert!(order.lowered_order[8].out_edge().is_none());
}
}