cranelift_wasm/
state.rs

1//! WebAssembly module and function translation state.
2//!
3//! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
4//! value and control stacks during the translation of a single function.
5
6use crate::environ::{FuncEnvironment, GlobalVariable};
7use crate::{FuncIndex, GlobalIndex, Heap, MemoryIndex, TableIndex, TypeIndex, WasmResult};
8use crate::{HashMap, Occupied, Vacant};
9use cranelift_codegen::ir::{self, Block, Inst, Value};
10use std::vec::Vec;
11
12/// Information about the presence of an associated `else` for an `if`, or the
13/// lack thereof.
14#[derive(Debug)]
15pub enum ElseData {
16    /// The `if` does not already have an `else` block.
17    ///
18    /// This doesn't mean that it will never have an `else`, just that we
19    /// haven't seen it yet.
20    NoElse {
21        /// If we discover that we need an `else` block, this is the jump
22        /// instruction that needs to be fixed up to point to the new `else`
23        /// block rather than the destination block after the `if...end`.
24        branch_inst: Inst,
25
26        /// The placeholder block we're replacing.
27        placeholder: Block,
28    },
29
30    /// We have already allocated an `else` block.
31    ///
32    /// Usually we don't know whether we will hit an `if .. end` or an `if
33    /// .. else .. end`, but sometimes we can tell based on the block's type
34    /// signature that the signature is not valid if there isn't an `else`. In
35    /// these cases, we pre-allocate the `else` block.
36    WithElse {
37        /// This is the `else` block.
38        else_block: Block,
39    },
40}
41
42/// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
43/// fields:
44///
45/// - `destination`: reference to the `Block` that will hold the code after the control block;
46/// - `num_return_values`: number of values returned by the control block;
47/// - `original_stack_size`: size of the value stack at the beginning of the control block.
48///
49/// The `loop` frame has a `header` field that references the `Block` that contains the beginning
50/// of the body of the loop.
51#[derive(Debug)]
52pub enum ControlStackFrame {
53    If {
54        destination: Block,
55        else_data: ElseData,
56        num_param_values: usize,
57        num_return_values: usize,
58        original_stack_size: usize,
59        exit_is_branched_to: bool,
60        blocktype: wasmparser::BlockType,
61        /// Was the head of the `if` reachable?
62        head_is_reachable: bool,
63        /// What was the reachability at the end of the consequent?
64        ///
65        /// This is `None` until we're finished translating the consequent, and
66        /// is set to `Some` either by hitting an `else` when we will begin
67        /// translating the alternative, or by hitting an `end` in which case
68        /// there is no alternative.
69        consequent_ends_reachable: Option<bool>,
70        // Note: no need for `alternative_ends_reachable` because that is just
71        // `state.reachable` when we hit the `end` in the `if .. else .. end`.
72    },
73    Block {
74        destination: Block,
75        num_param_values: usize,
76        num_return_values: usize,
77        original_stack_size: usize,
78        exit_is_branched_to: bool,
79    },
80    Loop {
81        destination: Block,
82        header: Block,
83        num_param_values: usize,
84        num_return_values: usize,
85        original_stack_size: usize,
86    },
87}
88
89/// Helper methods for the control stack objects.
90impl ControlStackFrame {
91    pub fn num_return_values(&self) -> usize {
92        match *self {
93            Self::If {
94                num_return_values, ..
95            }
96            | Self::Block {
97                num_return_values, ..
98            }
99            | Self::Loop {
100                num_return_values, ..
101            } => num_return_values,
102        }
103    }
104    pub fn num_param_values(&self) -> usize {
105        match *self {
106            Self::If {
107                num_param_values, ..
108            }
109            | Self::Block {
110                num_param_values, ..
111            }
112            | Self::Loop {
113                num_param_values, ..
114            } => num_param_values,
115        }
116    }
117    pub fn following_code(&self) -> Block {
118        match *self {
119            Self::If { destination, .. }
120            | Self::Block { destination, .. }
121            | Self::Loop { destination, .. } => destination,
122        }
123    }
124    pub fn br_destination(&self) -> Block {
125        match *self {
126            Self::If { destination, .. } | Self::Block { destination, .. } => destination,
127            Self::Loop { header, .. } => header,
128        }
129    }
130    /// Private helper. Use `truncate_value_stack_to_else_params()` or
131    /// `truncate_value_stack_to_original_size()` to restore value-stack state.
132    fn original_stack_size(&self) -> usize {
133        match *self {
134            Self::If {
135                original_stack_size,
136                ..
137            }
138            | Self::Block {
139                original_stack_size,
140                ..
141            }
142            | Self::Loop {
143                original_stack_size,
144                ..
145            } => original_stack_size,
146        }
147    }
148    pub fn is_loop(&self) -> bool {
149        match *self {
150            Self::If { .. } | Self::Block { .. } => false,
151            Self::Loop { .. } => true,
152        }
153    }
154
155    pub fn exit_is_branched_to(&self) -> bool {
156        match *self {
157            Self::If {
158                exit_is_branched_to,
159                ..
160            }
161            | Self::Block {
162                exit_is_branched_to,
163                ..
164            } => exit_is_branched_to,
165            Self::Loop { .. } => false,
166        }
167    }
168
169    pub fn set_branched_to_exit(&mut self) {
170        match *self {
171            Self::If {
172                ref mut exit_is_branched_to,
173                ..
174            }
175            | Self::Block {
176                ref mut exit_is_branched_to,
177                ..
178            } => *exit_is_branched_to = true,
179            Self::Loop { .. } => {}
180        }
181    }
182
183    /// Pop values from the value stack so that it is left at the
184    /// input-parameters to an else-block.
185    pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>) {
186        debug_assert!(matches!(self, &ControlStackFrame::If { .. }));
187        stack.truncate(self.original_stack_size());
188    }
189
190    /// Pop values from the value stack so that it is left at the state it was
191    /// before this control-flow frame.
192    pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>) {
193        // The "If" frame pushes its parameters twice, so they're available to the else block
194        // (see also `FuncTranslationState::push_if`).
195        // Yet, the original_stack_size member accounts for them only once, so that the else
196        // block can see the same number of parameters as the consequent block. As a matter of
197        // fact, we need to substract an extra number of parameter values for if blocks.
198        let num_duplicated_params = match self {
199            &ControlStackFrame::If {
200                num_param_values, ..
201            } => {
202                debug_assert!(num_param_values <= self.original_stack_size());
203                num_param_values
204            }
205            _ => 0,
206        };
207        stack.truncate(self.original_stack_size() - num_duplicated_params);
208    }
209}
210
211/// Contains information passed along during a function's translation and that records:
212///
213/// - The current value and control stacks.
214/// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
215///   unreachable code;
216pub struct FuncTranslationState {
217    /// A stack of values corresponding to the active values in the input wasm function at this
218    /// point.
219    pub(crate) stack: Vec<Value>,
220    /// A stack of active control flow operations at this point in the input wasm function.
221    pub(crate) control_stack: Vec<ControlStackFrame>,
222    /// Is the current translation state still reachable? This is false when translating operators
223    /// like End, Return, or Unreachable.
224    pub(crate) reachable: bool,
225
226    // Map of global variables that have already been created by `FuncEnvironment::make_global`.
227    globals: HashMap<GlobalIndex, GlobalVariable>,
228
229    // Map of heaps that have been created by `FuncEnvironment::make_heap`.
230    memory_to_heap: HashMap<MemoryIndex, Heap>,
231
232    // Map of tables that have been created by `FuncEnvironment::make_table`.
233    pub(crate) tables: HashMap<TableIndex, ir::Table>,
234
235    // Map of indirect call signatures that have been created by
236    // `FuncEnvironment::make_indirect_sig()`.
237    // Stores both the signature reference and the number of WebAssembly arguments
238    signatures: HashMap<TypeIndex, (ir::SigRef, usize)>,
239
240    // Imported and local functions that have been created by
241    // `FuncEnvironment::make_direct_func()`.
242    // Stores both the function reference and the number of WebAssembly arguments
243    functions: HashMap<FuncIndex, (ir::FuncRef, usize)>,
244}
245
246// Public methods that are exposed to non-`cranelift_wasm` API consumers.
247impl FuncTranslationState {
248    /// True if the current translation state expresses reachable code, false if it is unreachable.
249    #[inline]
250    pub fn reachable(&self) -> bool {
251        self.reachable
252    }
253}
254
255impl FuncTranslationState {
256    /// Construct a new, empty, `FuncTranslationState`
257    pub(crate) fn new() -> Self {
258        Self {
259            stack: Vec::new(),
260            control_stack: Vec::new(),
261            reachable: true,
262            globals: HashMap::new(),
263            memory_to_heap: HashMap::new(),
264            tables: HashMap::new(),
265            signatures: HashMap::new(),
266            functions: HashMap::new(),
267        }
268    }
269
270    fn clear(&mut self) {
271        debug_assert!(self.stack.is_empty());
272        debug_assert!(self.control_stack.is_empty());
273        self.reachable = true;
274        self.globals.clear();
275        self.memory_to_heap.clear();
276        self.tables.clear();
277        self.signatures.clear();
278        self.functions.clear();
279    }
280
281    /// Initialize the state for compiling a function with the given signature.
282    ///
283    /// This resets the state to containing only a single block representing the whole function.
284    /// The exit block is the last block in the function which will contain the return instruction.
285    pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
286        self.clear();
287        self.push_block(
288            exit_block,
289            0,
290            sig.returns
291                .iter()
292                .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
293                .count(),
294        );
295    }
296
297    /// Push a value.
298    pub(crate) fn push1(&mut self, val: Value) {
299        self.stack.push(val);
300    }
301
302    /// Push multiple values.
303    pub(crate) fn pushn(&mut self, vals: &[Value]) {
304        self.stack.extend_from_slice(vals);
305    }
306
307    /// Pop one value.
308    pub(crate) fn pop1(&mut self) -> Value {
309        self.stack
310            .pop()
311            .expect("attempted to pop a value from an empty stack")
312    }
313
314    /// Peek at the top of the stack without popping it.
315    pub(crate) fn peek1(&self) -> Value {
316        *self
317            .stack
318            .last()
319            .expect("attempted to peek at a value on an empty stack")
320    }
321
322    /// Pop two values. Return them in the order they were pushed.
323    pub(crate) fn pop2(&mut self) -> (Value, Value) {
324        let v2 = self.stack.pop().unwrap();
325        let v1 = self.stack.pop().unwrap();
326        (v1, v2)
327    }
328
329    /// Pop three values. Return them in the order they were pushed.
330    pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
331        let v3 = self.stack.pop().unwrap();
332        let v2 = self.stack.pop().unwrap();
333        let v1 = self.stack.pop().unwrap();
334        (v1, v2, v3)
335    }
336
337    /// Helper to ensure the the stack size is at least as big as `n`; note that due to
338    /// `debug_assert` this will not execute in non-optimized builds.
339    #[inline]
340    fn ensure_length_is_at_least(&self, n: usize) {
341        debug_assert!(
342            n <= self.stack.len(),
343            "attempted to access {} values but stack only has {} values",
344            n,
345            self.stack.len()
346        )
347    }
348
349    /// Pop the top `n` values on the stack.
350    ///
351    /// The popped values are not returned. Use `peekn` to look at them before popping.
352    pub(crate) fn popn(&mut self, n: usize) {
353        self.ensure_length_is_at_least(n);
354        let new_len = self.stack.len() - n;
355        self.stack.truncate(new_len);
356    }
357
358    /// Peek at the top `n` values on the stack in the order they were pushed.
359    pub(crate) fn peekn(&self, n: usize) -> &[Value] {
360        self.ensure_length_is_at_least(n);
361        &self.stack[self.stack.len() - n..]
362    }
363
364    /// Peek at the top `n` values on the stack in the order they were pushed.
365    pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
366        self.ensure_length_is_at_least(n);
367        let len = self.stack.len();
368        &mut self.stack[len - n..]
369    }
370
371    /// Push a block on the control stack.
372    pub(crate) fn push_block(
373        &mut self,
374        following_code: Block,
375        num_param_types: usize,
376        num_result_types: usize,
377    ) {
378        debug_assert!(num_param_types <= self.stack.len());
379        self.control_stack.push(ControlStackFrame::Block {
380            destination: following_code,
381            original_stack_size: self.stack.len() - num_param_types,
382            num_param_values: num_param_types,
383            num_return_values: num_result_types,
384            exit_is_branched_to: false,
385        });
386    }
387
388    /// Push a loop on the control stack.
389    pub(crate) fn push_loop(
390        &mut self,
391        header: Block,
392        following_code: Block,
393        num_param_types: usize,
394        num_result_types: usize,
395    ) {
396        debug_assert!(num_param_types <= self.stack.len());
397        self.control_stack.push(ControlStackFrame::Loop {
398            header,
399            destination: following_code,
400            original_stack_size: self.stack.len() - num_param_types,
401            num_param_values: num_param_types,
402            num_return_values: num_result_types,
403        });
404    }
405
406    /// Push an if on the control stack.
407    pub(crate) fn push_if(
408        &mut self,
409        destination: Block,
410        else_data: ElseData,
411        num_param_types: usize,
412        num_result_types: usize,
413        blocktype: wasmparser::BlockType,
414    ) {
415        debug_assert!(num_param_types <= self.stack.len());
416
417        // Push a second copy of our `if`'s parameters on the stack. This lets
418        // us avoid saving them on the side in the `ControlStackFrame` for our
419        // `else` block (if it exists), which would require a second heap
420        // allocation. See also the comment in `translate_operator` for
421        // `Operator::Else`.
422        self.stack.reserve(num_param_types);
423        for i in (self.stack.len() - num_param_types)..self.stack.len() {
424            let val = self.stack[i];
425            self.stack.push(val);
426        }
427
428        self.control_stack.push(ControlStackFrame::If {
429            destination,
430            else_data,
431            original_stack_size: self.stack.len() - num_param_types,
432            num_param_values: num_param_types,
433            num_return_values: num_result_types,
434            exit_is_branched_to: false,
435            head_is_reachable: self.reachable,
436            consequent_ends_reachable: None,
437            blocktype,
438        });
439    }
440}
441
442/// Methods for handling entity references.
443impl FuncTranslationState {
444    /// Get the `GlobalVariable` reference that should be used to access the global variable
445    /// `index`. Create the reference if necessary.
446    /// Also return the WebAssembly type of the global.
447    pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
448        &mut self,
449        func: &mut ir::Function,
450        index: u32,
451        environ: &mut FE,
452    ) -> WasmResult<GlobalVariable> {
453        let index = GlobalIndex::from_u32(index);
454        match self.globals.entry(index) {
455            Occupied(entry) => Ok(*entry.get()),
456            Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
457        }
458    }
459
460    /// Get the `Heap` reference that should be used to access linear memory `index`.
461    /// Create the reference if necessary.
462    pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
463        &mut self,
464        func: &mut ir::Function,
465        index: u32,
466        environ: &mut FE,
467    ) -> WasmResult<Heap> {
468        let index = MemoryIndex::from_u32(index);
469        match self.memory_to_heap.entry(index) {
470            Occupied(entry) => Ok(*entry.get()),
471            Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
472        }
473    }
474
475    /// Get the `Table` reference that should be used to access table `index`.
476    /// Create the reference if necessary.
477    pub(crate) fn get_or_create_table<FE: FuncEnvironment + ?Sized>(
478        &mut self,
479        func: &mut ir::Function,
480        index: u32,
481        environ: &mut FE,
482    ) -> WasmResult<ir::Table> {
483        let index = TableIndex::from_u32(index);
484        match self.tables.entry(index) {
485            Occupied(entry) => Ok(*entry.get()),
486            Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)),
487        }
488    }
489
490    /// Get the `SigRef` reference that should be used to make an indirect call with signature
491    /// `index`. Also return the number of WebAssembly arguments in the signature.
492    ///
493    /// Create the signature if necessary.
494    pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
495        &mut self,
496        func: &mut ir::Function,
497        index: u32,
498        environ: &mut FE,
499    ) -> WasmResult<(ir::SigRef, usize)> {
500        let index = TypeIndex::from_u32(index);
501        match self.signatures.entry(index) {
502            Occupied(entry) => Ok(*entry.get()),
503            Vacant(entry) => {
504                let sig = environ.make_indirect_sig(func, index)?;
505                Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
506            }
507        }
508    }
509
510    /// Get the `FuncRef` reference that should be used to make a direct call to function
511    /// `index`. Also return the number of WebAssembly arguments in the signature.
512    ///
513    /// Create the function reference if necessary.
514    pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
515        &mut self,
516        func: &mut ir::Function,
517        index: u32,
518        environ: &mut FE,
519    ) -> WasmResult<(ir::FuncRef, usize)> {
520        let index = FuncIndex::from_u32(index);
521        match self.functions.entry(index) {
522            Occupied(entry) => Ok(*entry.get()),
523            Vacant(entry) => {
524                let fref = environ.make_direct_func(func, index)?;
525                let sig = func.dfg.ext_funcs[fref].signature;
526                Ok(*entry.insert((
527                    fref,
528                    num_wasm_parameters(environ, &func.dfg.signatures[sig]),
529                )))
530            }
531        }
532    }
533}
534
535fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
536    environ: &FE,
537    signature: &ir::Signature,
538) -> usize {
539    (0..signature.params.len())
540        .filter(|index| environ.is_wasm_parameter(signature, *index))
541        .count()
542}