cranelift_codegen/egraph/elaborate.rs
1//! Elaboration phase: lowers EGraph back to sequences of operations
2//! in CFG nodes.
3
4use super::cost::{pure_op_cost, Cost};
5use super::domtree::DomTreeWithChildren;
6use super::Stats;
7use crate::dominator_tree::DominatorTree;
8use crate::fx::FxHashSet;
9use crate::ir::{Block, Function, Inst, Value, ValueDef};
10use crate::loop_analysis::{Loop, LoopAnalysis, LoopLevel};
11use crate::scoped_hash_map::ScopedHashMap;
12use crate::trace;
13use crate::unionfind::UnionFind;
14use alloc::vec::Vec;
15use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
16use smallvec::{smallvec, SmallVec};
17
18pub(crate) struct Elaborator<'a> {
19 func: &'a mut Function,
20 domtree: &'a DominatorTree,
21 domtree_children: &'a DomTreeWithChildren,
22 loop_analysis: &'a LoopAnalysis,
23 eclasses: &'a mut UnionFind<Value>,
24 /// Map from Value that is produced by a pure Inst (and was thus
25 /// not in the side-effecting skeleton) to the value produced by
26 /// an elaborated inst (placed in the layout) to whose results we
27 /// refer in the final code.
28 ///
29 /// The first time we use some result of an instruction during
30 /// elaboration, we can place it and insert an identity map (inst
31 /// results to that same inst's results) in this scoped
32 /// map. Within that block and its dom-tree children, that mapping
33 /// is visible and we can continue to use it. This allows us to
34 /// avoid cloning the instruction. However, if we pop that scope
35 /// and use it somewhere else as well, we will need to
36 /// duplicate. We detect this case by checking, when a value that
37 /// we want is not present in this map, whether the producing inst
38 /// is already placed in the Layout. If so, we duplicate, and
39 /// insert non-identity mappings from the original inst's results
40 /// to the cloned inst's results.
41 value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
42 /// Map from Value to the best (lowest-cost) Value in its eclass
43 /// (tree of union value-nodes).
44 value_to_best_value: SecondaryMap<Value, (Cost, Value)>,
45 /// Stack of blocks and loops in current elaboration path.
46 loop_stack: SmallVec<[LoopStackEntry; 8]>,
47 /// The current block into which we are elaborating.
48 cur_block: Block,
49 /// Values that opt rules have indicated should be rematerialized
50 /// in every block they are used (e.g., immediates or other
51 /// "cheap-to-compute" ops).
52 remat_values: &'a FxHashSet<Value>,
53 /// Explicitly-unrolled value elaboration stack.
54 elab_stack: Vec<ElabStackEntry>,
55 /// Results from the elab stack.
56 elab_result_stack: Vec<ElaboratedValue>,
57 /// Explicitly-unrolled block elaboration stack.
58 block_stack: Vec<BlockStackEntry>,
59 /// Stats for various events during egraph processing, to help
60 /// with optimization of this infrastructure.
61 stats: &'a mut Stats,
62}
63
64#[derive(Clone, Copy, Debug)]
65struct ElaboratedValue {
66 in_block: Block,
67 value: Value,
68}
69
70#[derive(Clone, Debug)]
71struct LoopStackEntry {
72 /// The loop identifier.
73 lp: Loop,
74 /// The hoist point: a block that immediately dominates this
75 /// loop. May not be an immediate predecessor, but will be a valid
76 /// point to place all loop-invariant ops: they must depend only
77 /// on inputs that dominate the loop, so are available at (the end
78 /// of) this block.
79 hoist_block: Block,
80 /// The depth in the scope map.
81 scope_depth: u32,
82}
83
84#[derive(Clone, Debug)]
85enum ElabStackEntry {
86 /// Next action is to resolve this value into an elaborated inst
87 /// (placed into the layout) that produces the value, and
88 /// recursively elaborate the insts that produce its args.
89 ///
90 /// Any inserted ops should be inserted before `before`, which is
91 /// the instruction demanding this value.
92 Start { value: Value, before: Inst },
93 /// Args have been pushed; waiting for results.
94 PendingInst {
95 inst: Inst,
96 result_idx: usize,
97 num_args: usize,
98 remat: bool,
99 before: Inst,
100 },
101}
102
103#[derive(Clone, Debug)]
104enum BlockStackEntry {
105 Elaborate { block: Block, idom: Option<Block> },
106 Pop,
107}
108
109impl<'a> Elaborator<'a> {
110 pub(crate) fn new(
111 func: &'a mut Function,
112 domtree: &'a DominatorTree,
113 domtree_children: &'a DomTreeWithChildren,
114 loop_analysis: &'a LoopAnalysis,
115 remat_values: &'a FxHashSet<Value>,
116 eclasses: &'a mut UnionFind<Value>,
117 stats: &'a mut Stats,
118 ) -> Self {
119 let num_values = func.dfg.num_values();
120 let mut value_to_best_value =
121 SecondaryMap::with_default((Cost::infinity(), Value::reserved_value()));
122 value_to_best_value.resize(num_values);
123 Self {
124 func,
125 domtree,
126 domtree_children,
127 loop_analysis,
128 eclasses,
129 value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
130 value_to_best_value,
131 loop_stack: smallvec![],
132 cur_block: Block::reserved_value(),
133 remat_values,
134 elab_stack: vec![],
135 elab_result_stack: vec![],
136 block_stack: vec![],
137 stats,
138 }
139 }
140
141 fn start_block(&mut self, idom: Option<Block>, block: Block) {
142 trace!(
143 "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
144 block,
145 idom,
146 self.loop_stack.len(),
147 self.value_to_elaborated_value.depth()
148 );
149
150 // Pop any loop levels we're no longer in.
151 while let Some(inner_loop) = self.loop_stack.last() {
152 if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
153 break;
154 }
155 self.loop_stack.pop();
156 }
157
158 // Note that if the *entry* block is a loop header, we will
159 // not make note of the loop here because it will not have an
160 // immediate dominator. We must disallow this case because we
161 // will skip adding the `LoopStackEntry` here but our
162 // `LoopAnalysis` will otherwise still make note of this loop
163 // and loop depths will not match.
164 if let Some(idom) = idom {
165 if let Some(lp) = self.loop_analysis.is_loop_header(block) {
166 self.loop_stack.push(LoopStackEntry {
167 lp,
168 // Any code hoisted out of this loop will have code
169 // placed in `idom`, and will have def mappings
170 // inserted in to the scoped hashmap at that block's
171 // level.
172 hoist_block: idom,
173 scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
174 });
175 trace!(
176 " -> loop header, pushing; depth now {}",
177 self.loop_stack.len()
178 );
179 }
180 } else {
181 debug_assert!(
182 self.loop_analysis.is_loop_header(block).is_none(),
183 "Entry block (domtree root) cannot be a loop header!"
184 );
185 }
186
187 trace!("block {}: loop stack is {:?}", block, self.loop_stack);
188
189 self.cur_block = block;
190 }
191
192 fn compute_best_values(&mut self) {
193 let best = &mut self.value_to_best_value;
194 for (value, def) in self.func.dfg.values_and_defs() {
195 trace!("computing best for value {:?} def {:?}", value, def);
196 match def {
197 ValueDef::Union(x, y) => {
198 // Pick the best of the two options based on
199 // min-cost. This works because each element of `best`
200 // is a `(cost, value)` tuple; `cost` comes first so
201 // the natural comparison works based on cost, and
202 // breaks ties based on value number.
203 trace!(" -> best of {:?} and {:?}", best[x], best[y]);
204 best[value] = std::cmp::min(best[x], best[y]);
205 trace!(" -> {:?}", best[value]);
206 }
207 ValueDef::Param(_, _) => {
208 best[value] = (Cost::zero(), value);
209 }
210 // If the Inst is inserted into the layout (which is,
211 // at this point, only the side-effecting skeleton),
212 // then it must be computed and thus we give it zero
213 // cost.
214 ValueDef::Result(inst, _) if self.func.layout.inst_block(inst).is_some() => {
215 best[value] = (Cost::zero(), value);
216 }
217 ValueDef::Result(inst, _) => {
218 trace!(" -> value {}: result, computing cost", value);
219 let inst_data = &self.func.dfg.insts[inst];
220 let loop_level = self
221 .func
222 .layout
223 .inst_block(inst)
224 .map(|block| self.loop_analysis.loop_level(block))
225 .unwrap_or(LoopLevel::root());
226 // N.B.: at this point we know that the opcode is
227 // pure, so `pure_op_cost`'s precondition is
228 // satisfied.
229 let cost = self.func.dfg.inst_values(inst).fold(
230 pure_op_cost(inst_data.opcode()).at_level(loop_level.level()),
231 |cost, value| cost + best[value].0,
232 );
233 best[value] = (cost, value);
234 }
235 };
236 debug_assert_ne!(best[value].0, Cost::infinity());
237 debug_assert_ne!(best[value].1, Value::reserved_value());
238 trace!("best for eclass {:?}: {:?}", value, best[value]);
239 }
240 }
241
242 /// Elaborate use of an eclass, inserting any needed new
243 /// instructions before the given inst `before`. Should only be
244 /// given values corresponding to results of instructions or
245 /// blockparams.
246 fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
247 debug_assert_ne!(value, Value::reserved_value());
248
249 // Kick off the process by requesting this result
250 // value.
251 self.elab_stack
252 .push(ElabStackEntry::Start { value, before });
253
254 // Now run the explicit-stack recursion until we reach
255 // the root.
256 self.process_elab_stack();
257 debug_assert_eq!(self.elab_result_stack.len(), 1);
258 self.elab_result_stack.pop().unwrap()
259 }
260
261 fn process_elab_stack(&mut self) {
262 while let Some(entry) = self.elab_stack.last() {
263 match entry {
264 &ElabStackEntry::Start { value, before } => {
265 // We always replace the Start entry, so pop it now.
266 self.elab_stack.pop();
267
268 debug_assert_ne!(value, Value::reserved_value());
269 let value = self.func.dfg.resolve_aliases(value);
270
271 self.stats.elaborate_visit_node += 1;
272 let canonical_value = self.eclasses.find_and_update(value);
273 debug_assert_ne!(canonical_value, Value::reserved_value());
274 trace!(
275 "elaborate: value {} canonical {} before {}",
276 value,
277 canonical_value,
278 before
279 );
280
281 // Get the best option; we use `value` (latest
282 // value) here so we have a full view of the
283 // eclass.
284 trace!("looking up best value for {}", value);
285 let (_, best_value) = self.value_to_best_value[value];
286 debug_assert_ne!(best_value, Value::reserved_value());
287 trace!("elaborate: value {} -> best {}", value, best_value);
288
289 let remat = if let Some(elab_val) =
290 self.value_to_elaborated_value.get(&canonical_value)
291 {
292 // Value is available. Look at the defined
293 // block, and determine whether this node kind
294 // allows rematerialization if the value comes
295 // from another block. If so, ignore the hit
296 // and recompute below.
297 let remat = elab_val.in_block != self.cur_block
298 && self.remat_values.contains(&best_value);
299 if !remat {
300 trace!("elaborate: value {} -> {:?}", value, elab_val);
301 self.stats.elaborate_memoize_hit += 1;
302 self.elab_result_stack.push(*elab_val);
303 continue;
304 }
305 trace!("elaborate: value {} -> remat", canonical_value);
306 self.stats.elaborate_memoize_miss_remat += 1;
307 // The op is pure at this point, so it is always valid to
308 // remove from this map.
309 self.value_to_elaborated_value.remove(&canonical_value);
310 true
311 } else {
312 // Value not available; but still look up
313 // whether it's been flagged for remat because
314 // this affects placement.
315 let remat = self.remat_values.contains(&best_value);
316 trace!(" -> not present in map; remat = {}", remat);
317 remat
318 };
319 self.stats.elaborate_memoize_miss += 1;
320
321 // Now resolve the value to its definition to see
322 // how we can compute it.
323 let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
324 ValueDef::Result(inst, result_idx) => {
325 trace!(
326 " -> value {} is result {} of {}",
327 best_value,
328 result_idx,
329 inst
330 );
331 (inst, result_idx)
332 }
333 ValueDef::Param(in_block, _) => {
334 // We don't need to do anything to compute
335 // this value; just push its result on the
336 // result stack (blockparams are already
337 // available).
338 trace!(" -> value {} is a blockparam", best_value);
339 self.elab_result_stack.push(ElaboratedValue {
340 in_block,
341 value: best_value,
342 });
343 continue;
344 }
345 ValueDef::Union(_, _) => {
346 panic!("Should never have a Union value as the best value");
347 }
348 };
349
350 trace!(
351 " -> result {} of inst {:?}",
352 result_idx,
353 self.func.dfg.insts[inst]
354 );
355
356 // We're going to need to use this instruction
357 // result, placing the instruction into the
358 // layout. First, enqueue all args to be
359 // elaborated. Push state to receive the results
360 // and later elab this inst.
361 let num_args = self.func.dfg.inst_values(inst).count();
362 self.elab_stack.push(ElabStackEntry::PendingInst {
363 inst,
364 result_idx,
365 num_args,
366 remat,
367 before,
368 });
369
370 // Push args in reverse order so we process the
371 // first arg first.
372 for arg in self.func.dfg.inst_values(inst).rev() {
373 debug_assert_ne!(arg, Value::reserved_value());
374 self.elab_stack
375 .push(ElabStackEntry::Start { value: arg, before });
376 }
377 }
378
379 &ElabStackEntry::PendingInst {
380 inst,
381 result_idx,
382 num_args,
383 remat,
384 before,
385 } => {
386 self.elab_stack.pop();
387
388 trace!(
389 "PendingInst: {} result {} args {} remat {} before {}",
390 inst,
391 result_idx,
392 num_args,
393 remat,
394 before
395 );
396
397 // We should have all args resolved at this
398 // point. Grab them and drain them out, removing
399 // them.
400 let arg_idx = self.elab_result_stack.len() - num_args;
401 let arg_values = &self.elab_result_stack[arg_idx..];
402
403 // Compute max loop depth.
404 //
405 // Note that if there are no arguments then this instruction
406 // is allowed to get hoisted up one loop. This is not
407 // usually used since no-argument values are things like
408 // constants which are typically rematerialized, but for the
409 // `vconst` instruction 128-bit constants aren't as easily
410 // rematerialized. They're hoisted out of inner loops but
411 // not to the function entry which may run the risk of
412 // placing too much register pressure on the entire
413 // function. This is modeled with the `.saturating_sub(1)`
414 // as the default if there's otherwise no maximum.
415 let loop_hoist_level = arg_values
416 .iter()
417 .map(|&value| {
418 // Find the outermost loop level at which
419 // the value's defining block *is not* a
420 // member. This is the loop-nest level
421 // whose hoist-block we hoist to.
422 let hoist_level = self
423 .loop_stack
424 .iter()
425 .position(|loop_entry| {
426 !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
427 })
428 .unwrap_or(self.loop_stack.len());
429 trace!(
430 " -> arg: elab_value {:?} hoist level {:?}",
431 value,
432 hoist_level
433 );
434 hoist_level
435 })
436 .max()
437 .unwrap_or(self.loop_stack.len().saturating_sub(1));
438 trace!(
439 " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
440 loop_hoist_level,
441 self.loop_stack.len(),
442 self.loop_stack,
443 );
444
445 // We know that this is a pure inst, because
446 // non-pure roots have already been placed in the
447 // value-to-elab'd-value map and are never subject
448 // to remat, so they will not reach this stage of
449 // processing.
450 //
451 // We now must determine the location at which we
452 // place the instruction. This is the current
453 // block *unless* we hoist above a loop when all
454 // args are loop-invariant (and this op is pure).
455 let (scope_depth, before, insert_block) =
456 if loop_hoist_level == self.loop_stack.len() || remat {
457 // Depends on some value at the current
458 // loop depth, or remat forces it here:
459 // place it at the current location.
460 (
461 self.value_to_elaborated_value.depth(),
462 before,
463 self.func.layout.inst_block(before).unwrap(),
464 )
465 } else {
466 // Does not depend on any args at current
467 // loop depth: hoist out of loop.
468 self.stats.elaborate_licm_hoist += 1;
469 let data = &self.loop_stack[loop_hoist_level];
470 // `data.hoist_block` should dominate `before`'s block.
471 let before_block = self.func.layout.inst_block(before).unwrap();
472 debug_assert!(self.domtree.dominates(
473 data.hoist_block,
474 before_block,
475 &self.func.layout
476 ));
477 // Determine the instruction at which we
478 // insert in `data.hoist_block`.
479 let before = self.func.layout.last_inst(data.hoist_block).unwrap();
480 (data.scope_depth as usize, before, data.hoist_block)
481 };
482
483 trace!(
484 " -> decided to place: before {} insert_block {}",
485 before,
486 insert_block
487 );
488
489 // Now we need to place `inst` at the computed
490 // location (just before `before`). Note that
491 // `inst` may already have been placed somewhere
492 // else, because a pure node may be elaborated at
493 // more than one place. In this case, we need to
494 // duplicate the instruction (and return the
495 // `Value`s for that duplicated instance
496 // instead).
497 trace!("need inst {} before {}", inst, before);
498 let inst = if self.func.layout.inst_block(inst).is_some() {
499 // Clone the inst!
500 let new_inst = self.func.dfg.clone_inst(inst);
501 trace!(
502 " -> inst {} already has a location; cloned to {}",
503 inst,
504 new_inst
505 );
506 // Create mappings in the
507 // value-to-elab'd-value map from original
508 // results to cloned results.
509 for (&result, &new_result) in self
510 .func
511 .dfg
512 .inst_results(inst)
513 .iter()
514 .zip(self.func.dfg.inst_results(new_inst).iter())
515 {
516 let elab_value = ElaboratedValue {
517 value: new_result,
518 in_block: insert_block,
519 };
520 let canonical_result = self.eclasses.find_and_update(result);
521 self.value_to_elaborated_value.insert_if_absent_with_depth(
522 canonical_result,
523 elab_value,
524 scope_depth,
525 );
526
527 self.eclasses.add(new_result);
528 self.eclasses.union(result, new_result);
529 self.value_to_best_value[new_result] = self.value_to_best_value[result];
530
531 trace!(
532 " -> cloned inst has new result {} for orig {}",
533 new_result,
534 result
535 );
536 }
537 new_inst
538 } else {
539 trace!(" -> no location; using original inst");
540 // Create identity mappings from result values
541 // to themselves in this scope, since we're
542 // using the original inst.
543 for &result in self.func.dfg.inst_results(inst) {
544 let elab_value = ElaboratedValue {
545 value: result,
546 in_block: insert_block,
547 };
548 let canonical_result = self.eclasses.find_and_update(result);
549 self.value_to_elaborated_value.insert_if_absent_with_depth(
550 canonical_result,
551 elab_value,
552 scope_depth,
553 );
554 trace!(" -> inserting identity mapping for {}", result);
555 }
556 inst
557 };
558 // Place the inst just before `before`.
559 self.func.layout.insert_inst(inst, before);
560
561 // Update the inst's arguments.
562 self.func
563 .dfg
564 .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
565
566 // Now that we've consumed the arg values, pop
567 // them off the stack.
568 self.elab_result_stack.truncate(arg_idx);
569
570 // Push the requested result index of the
571 // instruction onto the elab-results stack.
572 self.elab_result_stack.push(ElaboratedValue {
573 in_block: insert_block,
574 value: self.func.dfg.inst_results(inst)[result_idx],
575 });
576 }
577 }
578 }
579 }
580
581 fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
582 trace!("elaborate_block: block {}", block);
583 self.start_block(idom, block);
584
585 // Iterate over the side-effecting skeleton using the linked
586 // list in Layout. We will insert instructions that are
587 // elaborated *before* `inst`, so we can always use its
588 // next-link to continue the iteration.
589 let mut next_inst = self.func.layout.first_inst(block);
590 let mut first_branch = None;
591 while let Some(inst) = next_inst {
592 trace!(
593 "elaborating inst {} with results {:?}",
594 inst,
595 self.func.dfg.inst_results(inst)
596 );
597 // Record the first branch we see in the block; all
598 // elaboration for args of *any* branch must be inserted
599 // before the *first* branch, because the branch group
600 // must remain contiguous at the end of the block.
601 if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
602 first_branch = Some(inst);
603 }
604
605 // Determine where elaboration inserts insts.
606 let before = first_branch.unwrap_or(inst);
607 trace!(" -> inserting before {}", before);
608
609 elab_values.extend(self.func.dfg.inst_values(inst));
610 for arg in elab_values.iter_mut() {
611 trace!(" -> arg {}", *arg);
612 // Elaborate the arg, placing any newly-inserted insts
613 // before `before`. Get the updated value, which may
614 // be different than the original.
615 let new_arg = self.elaborate_eclass_use(*arg, before);
616 trace!(" -> rewrote arg to {:?}", new_arg);
617 *arg = new_arg.value;
618 }
619 self.func
620 .dfg
621 .overwrite_inst_values(inst, elab_values.drain(..));
622
623 // We need to put the results of this instruction in the
624 // map now.
625 for &result in self.func.dfg.inst_results(inst) {
626 trace!(" -> result {}", result);
627 let canonical_result = self.eclasses.find_and_update(result);
628 self.value_to_elaborated_value.insert_if_absent(
629 canonical_result,
630 ElaboratedValue {
631 in_block: block,
632 value: result,
633 },
634 );
635 }
636
637 next_inst = self.func.layout.next_inst(inst);
638 }
639 }
640
641 fn elaborate_domtree(&mut self, domtree: &DomTreeWithChildren) {
642 let root = domtree.root();
643 self.block_stack.push(BlockStackEntry::Elaborate {
644 block: root,
645 idom: None,
646 });
647
648 // A temporary workspace for elaborate_block, allocated here to maximize the use of the
649 // allocation.
650 let mut elab_values = Vec::new();
651
652 while let Some(top) = self.block_stack.pop() {
653 match top {
654 BlockStackEntry::Elaborate { block, idom } => {
655 self.block_stack.push(BlockStackEntry::Pop);
656 self.value_to_elaborated_value.increment_depth();
657
658 self.elaborate_block(&mut elab_values, idom, block);
659
660 // Push children. We are doing a preorder
661 // traversal so we do this after processing this
662 // block above.
663 let block_stack_end = self.block_stack.len();
664 for child in domtree.children(block) {
665 self.block_stack.push(BlockStackEntry::Elaborate {
666 block: child,
667 idom: Some(block),
668 });
669 }
670 // Reverse what we just pushed so we elaborate in
671 // original block order. (The domtree iter is a
672 // single-ended iter over a singly-linked list so
673 // we can't `.rev()` above.)
674 self.block_stack[block_stack_end..].reverse();
675 }
676 BlockStackEntry::Pop => {
677 self.value_to_elaborated_value.decrement_depth();
678 }
679 }
680 }
681 }
682
683 pub(crate) fn elaborate(&mut self) {
684 self.stats.elaborate_func += 1;
685 self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
686 self.compute_best_values();
687 self.elaborate_domtree(&self.domtree_children);
688 self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
689 }
690}