cranelift_codegen/
timing.rs

1//! Pass timing.
2//!
3//! This modules provides facilities for timing the execution of individual compilation passes.
4
5use core::fmt;
6use std::any::Any;
7use std::boxed::Box;
8use std::cell::{Cell, RefCell};
9use std::mem;
10use std::time::{Duration, Instant};
11
12// Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a
13// snake_case name and a plain text description used when printing out the timing report.
14//
15// This macro defines:
16//
17// - A C-style enum containing all the pass names and a `None` variant.
18// - A usize constant with the number of defined passes.
19// - A const array of pass descriptions.
20// - A public function per pass used to start the timing of that pass.
21macro_rules! define_passes {
22    ($($pass:ident: $desc:expr,)+) => {
23        /// A single profiled pass.
24        #[allow(non_camel_case_types)]
25        #[derive(Clone, Copy, Debug, PartialEq, Eq)]
26        pub enum Pass {
27            $(#[doc=$desc] $pass,)+
28            /// No active pass.
29            None,
30        }
31
32        /// The amount of profiled passes.
33        pub const NUM_PASSES: usize = Pass::None as usize;
34
35        const DESCRIPTIONS: [&str; NUM_PASSES] = [ $($desc),+ ];
36
37        $(
38            #[doc=$desc]
39            #[must_use]
40            pub fn $pass() -> Box<dyn Any> {
41                start_pass(Pass::$pass)
42            }
43        )+
44    }
45}
46
47// Pass definitions.
48define_passes! {
49    // All these are used in other crates but defined here so they appear in the unified
50    // `PassTimes` output.
51    process_file: "Processing test file",
52    parse_text: "Parsing textual Cranelift IR",
53    wasm_translate_module: "Translate WASM module",
54    wasm_translate_function: "Translate WASM function",
55
56    verifier: "Verify Cranelift IR",
57
58    compile: "Compilation passes",
59    try_incremental_cache: "Try loading from incremental cache",
60    store_incremental_cache: "Store in incremental cache",
61    flowgraph: "Control flow graph",
62    domtree: "Dominator tree",
63    loop_analysis: "Loop analysis",
64    preopt: "Pre-legalization rewriting",
65    dce: "Dead code elimination",
66    egraph: "Egraph based optimizations",
67    gvn: "Global value numbering",
68    licm: "Loop invariant code motion",
69    unreachable_code: "Remove unreachable blocks",
70    remove_constant_phis: "Remove constant phi-nodes",
71
72    vcode_lower: "VCode lowering",
73    vcode_emit: "VCode emission",
74    vcode_emit_finish: "VCode emission finalization",
75
76    regalloc: "Register allocation",
77    regalloc_checker: "Register allocation symbolic verification",
78    layout_renumber: "Layout full renumbering",
79
80    canonicalize_nans: "Canonicalization of NaNs",
81}
82
83impl Pass {
84    fn idx(self) -> usize {
85        self as usize
86    }
87
88    /// Description of the pass.
89    pub fn description(self) -> &'static str {
90        match DESCRIPTIONS.get(self.idx()) {
91            Some(s) => s,
92            None => "<no pass>",
93        }
94    }
95}
96
97impl fmt::Display for Pass {
98    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
99        f.write_str(self.description())
100    }
101}
102
103/// A profiler.
104pub trait Profiler {
105    /// Start a profiling pass.
106    ///
107    /// Will return a token which when dropped indicates the end of the pass.
108    ///
109    /// Multiple passes can be active at the same time, but they must be started and stopped in a
110    /// LIFO fashion.
111    fn start_pass(&self, pass: Pass) -> Box<dyn Any>;
112}
113
114// Information about passes in a single thread.
115thread_local! {
116    static PROFILER: RefCell<Box<dyn Profiler>> = RefCell::new(Box::new(DefaultProfiler));
117}
118
119/// Set the profiler for the current thread.
120///
121/// Returns the old profiler.
122pub fn set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler> {
123    PROFILER.with(|profiler| std::mem::replace(&mut *profiler.borrow_mut(), new_profiler))
124}
125
126/// Start timing `pass` as a child of the currently running pass, if any.
127///
128/// This function is called by the publicly exposed pass functions.
129fn start_pass(pass: Pass) -> Box<dyn Any> {
130    PROFILER.with(|profiler| profiler.borrow().start_pass(pass))
131}
132
133/// A timing token is responsible for timing the currently running pass. Timing starts when it
134/// is created and ends when it is dropped.
135///
136/// Multiple passes can be active at the same time, but they must be started and stopped in a
137/// LIFO fashion.
138struct DefaultTimingToken {
139    /// Start time for this pass.
140    start: Instant,
141
142    // Pass being timed by this token.
143    pass: Pass,
144
145    // The previously active pass which will be restored when this token is dropped.
146    prev: Pass,
147}
148
149/// Accumulated timing information for a single pass.
150#[derive(Default, Copy, Clone)]
151struct PassTime {
152    /// Total time spent running this pass including children.
153    total: Duration,
154
155    /// Time spent running in child passes.
156    child: Duration,
157}
158
159/// Accumulated timing for all passes.
160pub struct PassTimes {
161    pass: [PassTime; NUM_PASSES],
162}
163
164impl PassTimes {
165    /// Add `other` to the timings of this `PassTimes`.
166    pub fn add(&mut self, other: &Self) {
167        for (a, b) in self.pass.iter_mut().zip(&other.pass[..]) {
168            a.total += b.total;
169            a.child += b.child;
170        }
171    }
172
173    /// Returns the total amount of time taken by all the passes measured.
174    pub fn total(&self) -> Duration {
175        self.pass.iter().map(|p| p.total - p.child).sum()
176    }
177}
178
179impl Default for PassTimes {
180    fn default() -> Self {
181        Self {
182            pass: [Default::default(); NUM_PASSES],
183        }
184    }
185}
186
187impl fmt::Display for PassTimes {
188    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
189        writeln!(f, "======== ========  ==================================")?;
190        writeln!(f, "   Total     Self  Pass")?;
191        writeln!(f, "-------- --------  ----------------------------------")?;
192        for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS[..]) {
193            // Omit passes that haven't run.
194            if time.total == Duration::default() {
195                continue;
196            }
197
198            // Write a duration as secs.millis, trailing space.
199            fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result {
200                // Round to nearest ms by adding 500us.
201                dur += Duration::new(0, 500_000);
202                let ms = dur.subsec_millis();
203                write!(f, "{:4}.{:03} ", dur.as_secs(), ms)
204            }
205
206            fmtdur(time.total, f)?;
207            if let Some(s) = time.total.checked_sub(time.child) {
208                fmtdur(s, f)?;
209            }
210            writeln!(f, " {}", desc)?;
211        }
212        writeln!(f, "======== ========  ==================================")
213    }
214}
215
216// Information about passes in a single thread.
217thread_local! {
218    static CURRENT_PASS: Cell<Pass> = const { Cell::new(Pass::None) };
219    static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default());
220}
221
222/// The default profiler. You can get the results using [`take_current`].
223pub struct DefaultProfiler;
224
225impl Profiler for DefaultProfiler {
226    fn start_pass(&self, pass: Pass) -> Box<dyn Any> {
227        let prev = CURRENT_PASS.with(|p| p.replace(pass));
228        log::debug!("timing: Starting {}, (during {})", pass, prev);
229        Box::new(DefaultTimingToken {
230            start: Instant::now(),
231            pass,
232            prev,
233        })
234    }
235}
236
237/// Dropping a timing token indicated the end of the pass.
238impl Drop for DefaultTimingToken {
239    fn drop(&mut self) {
240        let duration = self.start.elapsed();
241        log::debug!("timing: Ending {}", self.pass);
242        let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev));
243        debug_assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
244        PASS_TIME.with(|rc| {
245            let mut table = rc.borrow_mut();
246            table.pass[self.pass.idx()].total += duration;
247            if let Some(parent) = table.pass.get_mut(self.prev.idx()) {
248                parent.child += duration;
249            }
250        })
251    }
252}
253
254/// Take the current accumulated pass timings and reset the timings for the current thread.
255///
256/// Only applies when [`DefaultProfiler`] is used.
257pub fn take_current() -> PassTimes {
258    PASS_TIME.with(|rc| mem::take(&mut *rc.borrow_mut()))
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use alloc::string::ToString;
265
266    #[test]
267    fn display() {
268        assert_eq!(Pass::None.to_string(), "<no pass>");
269        assert_eq!(Pass::regalloc.to_string(), "Register allocation");
270    }
271}