cranelift_codegen/machinst/
vcode.rs

1//! This implements the VCode container: a CFG of Insts that have been lowered.
2//!
3//! VCode is virtual-register code. An instruction in VCode is almost a machine
4//! instruction; however, its register slots can refer to virtual registers in
5//! addition to real machine registers.
6//!
7//! VCode is structured with traditional basic blocks, and
8//! each block must be terminated by an unconditional branch (one target), a
9//! conditional branch (two targets), or a return (no targets). Note that this
10//! slightly differs from the machine code of most ISAs: in most ISAs, a
11//! conditional branch has one target (and the not-taken case falls through).
12//! However, we expect that machine backends will elide branches to the following
13//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15//! play with layout prior to final binary emission, as well, if we want.
16//!
17//! See the main module comment in `mod.rs` for more details on the VCode-based
18//! backend pipeline.
19
20use crate::fx::FxHashMap;
21use crate::fx::FxHashSet;
22use crate::ir::RelSourceLoc;
23use crate::ir::{self, types, Constant, ConstantData, DynamicStackSlot, LabelValueLoc, ValueLabel};
24use crate::machinst::*;
25use crate::timing;
26use crate::trace;
27use crate::CodegenError;
28use crate::ValueLocRange;
29use regalloc2::{
30    Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PRegSet,
31    RegClass, VReg,
32};
33
34use alloc::vec::Vec;
35use cranelift_entity::{entity_impl, Keys, PrimaryMap};
36use std::collections::hash_map::Entry;
37use std::collections::HashMap;
38use std::fmt;
39
40/// Index referring to an instruction in VCode.
41pub type InsnIndex = regalloc2::Inst;
42
43/// Index referring to a basic block in VCode.
44pub type BlockIndex = regalloc2::Block;
45
46/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
47/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
48pub trait VCodeInst: MachInst + MachInstEmit {}
49impl<I: MachInst + MachInstEmit> VCodeInst for I {}
50
51/// A function in "VCode" (virtualized-register code) form, after
52/// lowering.  This is essentially a standard CFG of basic blocks,
53/// where each basic block consists of lowered instructions produced
54/// by the machine-specific backend.
55///
56/// Note that the VCode is immutable once produced, and is not
57/// modified by register allocation in particular. Rather, register
58/// allocation on the `VCode` produces a separate `regalloc2::Output`
59/// struct, and this can be passed to `emit`. `emit` in turn does not
60/// modify the vcode, but produces an `EmitResult`, which contains the
61/// machine code itself, and the associated disassembly and/or
62/// metadata as requested.
63pub struct VCode<I: VCodeInst> {
64    /// VReg IR-level types.
65    vreg_types: Vec<Type>,
66
67    /// Lowered machine instructions in order corresponding to the original IR.
68    insts: Vec<I>,
69
70    /// Operands: pre-regalloc references to virtual registers with
71    /// constraints, in one flattened array. This allows the regalloc
72    /// to efficiently access all operands without requiring expensive
73    /// matches or method invocations on insts.
74    operands: Vec<Operand>,
75
76    /// Operand index ranges: for each instruction in `insts`, there
77    /// is a tuple here providing the range in `operands` for that
78    /// instruction's operands.
79    operand_ranges: Vec<(u32, u32)>,
80
81    /// Clobbers: a sparse map from instruction indices to clobber masks.
82    clobbers: FxHashMap<InsnIndex, PRegSet>,
83
84    /// Move information: for a given InsnIndex, (src, dst) operand pair.
85    is_move: FxHashMap<InsnIndex, (Operand, Operand)>,
86
87    /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
88    /// reasonable to keep one of these per instruction.)
89    srclocs: Vec<RelSourceLoc>,
90
91    /// Entry block.
92    entry: BlockIndex,
93
94    /// Block instruction indices.
95    block_ranges: Vec<(InsnIndex, InsnIndex)>,
96
97    /// Block successors: index range in the `block_succs_preds` list.
98    block_succ_range: Vec<(u32, u32)>,
99
100    /// Block predecessors: index range in the `block_succs_preds` list.
101    block_pred_range: Vec<(u32, u32)>,
102
103    /// Block successor and predecessor lists, concatenated into one
104    /// Vec. The `block_succ_range` and `block_pred_range` lists of
105    /// tuples above give (start, end) ranges within this list that
106    /// correspond to each basic block's successors or predecessors,
107    /// respectively.
108    block_succs_preds: Vec<regalloc2::Block>,
109
110    /// Block parameters: index range in `block_params` below.
111    block_params_range: Vec<(u32, u32)>,
112
113    /// Block parameter lists, concatenated into one vec. The
114    /// `block_params_range` list of tuples above gives (start, end)
115    /// ranges within this list that correspond to each basic block's
116    /// blockparam vregs.
117    block_params: Vec<regalloc2::VReg>,
118
119    /// Outgoing block arguments on branch instructions, concatenated
120    /// into one list.
121    ///
122    /// Note that this is conceptually a 3D array: we have a VReg list
123    /// per block, per successor. We flatten those three dimensions
124    /// into this 1D vec, then store index ranges in two levels of
125    /// indirection.
126    ///
127    /// Indexed by the indices in `branch_block_arg_succ_range`.
128    branch_block_args: Vec<regalloc2::VReg>,
129
130    /// Array of sequences of (start, end) tuples in
131    /// `branch_block_args`, one for each successor; these sequences
132    /// for each block are concatenated.
133    ///
134    /// Indexed by the indices in `branch_block_arg_succ_range`.
135    branch_block_arg_range: Vec<(u32, u32)>,
136
137    /// For a given block, indices in `branch_block_arg_range`
138    /// corresponding to all of its successors.
139    branch_block_arg_succ_range: Vec<(u32, u32)>,
140
141    /// VReg aliases. Each key in this table is translated to its
142    /// value when gathering Operands from instructions. Aliases are
143    /// not chased transitively (we do not further look up the
144    /// translated reg to see if it is another alias).
145    ///
146    /// We use these aliases to rename an instruction's expected
147    /// result vregs to the returned vregs from lowering, which are
148    /// usually freshly-allocated temps.
149    ///
150    /// Operands and branch arguments will already have been
151    /// translated through this alias table; but it helps to make
152    /// sense of instructions when pretty-printed, for example.
153    vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
154
155    /// Block-order information.
156    block_order: BlockLoweringOrder,
157
158    /// ABI object.
159    pub(crate) abi: Callee<I::ABIMachineSpec>,
160
161    /// Constant information used during code emission. This should be
162    /// immutable across function compilations within the same module.
163    emit_info: I::Info,
164
165    /// Reference-typed `regalloc2::VReg`s. The regalloc requires
166    /// these in a dense slice (as opposed to querying the
167    /// reftype-status of each vreg) for efficient iteration.
168    reftyped_vregs: Vec<VReg>,
169
170    /// Constants.
171    constants: VCodeConstants,
172
173    /// Value labels for debuginfo attached to vregs.
174    debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
175
176    pub(crate) sigs: SigSet,
177}
178
179/// The result of `VCode::emit`. Contains all information computed
180/// during emission: actual machine code, optionally a disassembly,
181/// and optionally metadata about the code layout.
182pub struct EmitResult<I: VCodeInst> {
183    /// The MachBuffer containing the machine code.
184    pub buffer: MachBuffer<I>,
185
186    /// Offset of each basic block, recorded during emission. Computed
187    /// only if `debug_value_labels` is non-empty.
188    pub bb_offsets: Vec<CodeOffset>,
189
190    /// Final basic-block edges, in terms of code offsets of
191    /// bb-starts. Computed only if `debug_value_labels` is non-empty.
192    pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
193
194    /// Final instruction offsets, recorded during emission. Computed
195    /// only if `debug_value_labels` is non-empty.
196    pub inst_offsets: Vec<CodeOffset>,
197
198    /// Final length of function body.
199    pub func_body_len: CodeOffset,
200
201    /// The pretty-printed disassembly, if any. This uses the same
202    /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
203    /// implementation, but additionally includes the prologue and
204    /// epilogue(s), and makes use of the regalloc results.
205    pub disasm: Option<String>,
206
207    /// Offsets of sized stackslots.
208    pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
209
210    /// Offsets of dynamic stackslots.
211    pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
212
213    /// Value-labels information (debug metadata).
214    pub value_labels_ranges: ValueLabelsRanges,
215
216    /// Stack frame size.
217    pub frame_size: u32,
218
219    /// The alignment requirement for pc-relative loads.
220    pub alignment: u32,
221}
222
223/// A builder for a VCode function body.
224///
225/// This builder has the ability to accept instructions in either
226/// forward or reverse order, depending on the pass direction that
227/// produces the VCode. The lowering from CLIF to VCode<MachInst>
228/// ordinarily occurs in reverse order (in order to allow instructions
229/// to be lowered only if used, and not merged) so a reversal will
230/// occur at the end of lowering to ensure the VCode is in machine
231/// order.
232///
233/// If built in reverse, block and instruction indices used once the
234/// VCode is built are relative to the final (reversed) order, not the
235/// order of construction. Note that this means we do not know the
236/// final block or instruction indices when building, so we do not
237/// hand them out. (The user is assumed to know them when appending
238/// terminator instructions with successor blocks.)
239pub struct VCodeBuilder<I: VCodeInst> {
240    /// In-progress VCode.
241    pub(crate) vcode: VCode<I>,
242
243    /// In what direction is the build occuring?
244    direction: VCodeBuildDirection,
245
246    /// Index of the last block-start in the vcode.
247    block_start: usize,
248
249    /// Start of succs for the current block in the concatenated succs list.
250    succ_start: usize,
251
252    /// Start of blockparams for the current block in the concatenated
253    /// blockparams list.
254    block_params_start: usize,
255
256    /// Start of successor blockparam arg list entries in
257    /// the concatenated branch_block_arg_range list.
258    branch_block_arg_succ_start: usize,
259
260    /// Current source location.
261    cur_srcloc: RelSourceLoc,
262
263    /// Debug-value label in-progress map, keyed by label. For each
264    /// label, we keep disjoint ranges mapping to vregs. We'll flatten
265    /// this into (vreg, range, label) tuples when done.
266    debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
267}
268
269/// Direction in which a VCodeBuilder builds VCode.
270#[derive(Clone, Copy, Debug, PartialEq, Eq)]
271pub enum VCodeBuildDirection {
272    // TODO: add `Forward` once we need it and can test it adequately.
273    /// Backward-build pass: we expect the producer to call `emit()`
274    /// with instructions in reverse program order within each block.
275    Backward,
276}
277
278impl<I: VCodeInst> VCodeBuilder<I> {
279    /// Create a new VCodeBuilder.
280    pub fn new(
281        sigs: SigSet,
282        abi: Callee<I::ABIMachineSpec>,
283        emit_info: I::Info,
284        block_order: BlockLoweringOrder,
285        constants: VCodeConstants,
286        direction: VCodeBuildDirection,
287    ) -> VCodeBuilder<I> {
288        let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
289
290        VCodeBuilder {
291            vcode,
292            direction,
293            block_start: 0,
294            succ_start: 0,
295            block_params_start: 0,
296            branch_block_arg_succ_start: 0,
297            cur_srcloc: Default::default(),
298            debug_info: FxHashMap::default(),
299        }
300    }
301
302    pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) {
303        self.vcode.abi.init(&self.vcode.sigs, temps);
304    }
305
306    /// Access the ABI object.
307    pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
308        &self.vcode.abi
309    }
310
311    /// Access the ABI object.
312    pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
313        &mut self.vcode.abi
314    }
315
316    pub fn sigs(&self) -> &SigSet {
317        &self.vcode.sigs
318    }
319
320    pub fn sigs_mut(&mut self) -> &mut SigSet {
321        &mut self.vcode.sigs
322    }
323
324    /// Access to the BlockLoweringOrder object.
325    pub fn block_order(&self) -> &BlockLoweringOrder {
326        &self.vcode.block_order
327    }
328
329    /// Set the current block as the entry block.
330    pub fn set_entry(&mut self, block: BlockIndex) {
331        self.vcode.entry = block;
332    }
333
334    /// End the current basic block. Must be called after emitting vcode insts
335    /// for IR insts and prior to ending the function (building the VCode).
336    pub fn end_bb(&mut self) {
337        let start_idx = self.block_start;
338        let end_idx = self.vcode.insts.len();
339        self.block_start = end_idx;
340        // Add the instruction index range to the list of blocks.
341        self.vcode
342            .block_ranges
343            .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx)));
344        // End the successors list.
345        let succ_end = self.vcode.block_succs_preds.len();
346        self.vcode
347            .block_succ_range
348            .push((self.succ_start as u32, succ_end as u32));
349        self.succ_start = succ_end;
350        // End the blockparams list.
351        let block_params_end = self.vcode.block_params.len();
352        self.vcode
353            .block_params_range
354            .push((self.block_params_start as u32, block_params_end as u32));
355        self.block_params_start = block_params_end;
356        // End the branch blockparam args list.
357        let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
358        self.vcode.branch_block_arg_succ_range.push((
359            self.branch_block_arg_succ_start as u32,
360            branch_block_arg_succ_end as u32,
361        ));
362        self.branch_block_arg_succ_start = branch_block_arg_succ_end;
363    }
364
365    pub fn add_block_param(&mut self, param: VirtualReg) {
366        self.vcode.block_params.push(param.into());
367    }
368
369    fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
370        let start = self.vcode.branch_block_args.len();
371        self.vcode
372            .branch_block_args
373            .extend(args.iter().map(|&arg| VReg::from(arg)));
374        let end = self.vcode.branch_block_args.len();
375        self.vcode
376            .branch_block_arg_range
377            .push((start as u32, end as u32));
378    }
379
380    /// Push an instruction for the current BB and current IR inst
381    /// within the BB.
382    pub fn push(&mut self, insn: I) {
383        self.vcode.insts.push(insn);
384        self.vcode.srclocs.push(self.cur_srcloc);
385    }
386
387    /// Add a successor block with branch args.
388    pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
389        self.vcode.block_succs_preds.push(block);
390        self.add_branch_args_for_succ(args);
391    }
392
393    /// Set the current source location.
394    pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) {
395        self.cur_srcloc = srcloc;
396    }
397
398    /// Add a debug value label to a register.
399    pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
400        // We'll fix up labels in reverse(). Because we're generating
401        // code bottom-to-top, the liverange of the label goes *from*
402        // the last index at which was defined (or 0, which is the end
403        // of the eventual function) *to* just this instruction, and
404        // no further.
405        let inst = InsnIndex::new(self.vcode.insts.len());
406        let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
407        let last = labels
408            .last()
409            .map(|(_start, end, _vreg)| *end)
410            .unwrap_or(InsnIndex::new(0));
411        labels.push((last, inst, reg.into()));
412    }
413
414    pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
415        let from = from.into();
416        let resolved_to = self.resolve_vreg_alias(to.into());
417        // Disallow cycles (see below).
418        assert_ne!(resolved_to, from);
419        self.vcode.vreg_aliases.insert(from, resolved_to);
420    }
421
422    pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg {
423        Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from)
424    }
425
426    fn resolve_vreg_alias_impl(
427        aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>,
428        from: regalloc2::VReg,
429    ) -> regalloc2::VReg {
430        // We prevent cycles from existing by resolving targets of
431        // aliases eagerly before setting them. If the target resolves
432        // to the origin of the alias, then a cycle would be created
433        // and the alias is disallowed. Because of the structure of
434        // SSA code (one instruction can refer to another's defs but
435        // not vice-versa, except indirectly through
436        // phis/blockparams), cycles should not occur as we use
437        // aliases to redirect vregs to the temps that actually define
438        // them.
439
440        let mut vreg = from;
441        while let Some(to) = aliases.get(&vreg) {
442            vreg = *to;
443        }
444        vreg
445    }
446
447    /// Access the constants.
448    pub fn constants(&mut self) -> &mut VCodeConstants {
449        &mut self.vcode.constants
450    }
451
452    fn compute_preds_from_succs(&mut self) {
453        // Compute predecessors from successors. In order to gather
454        // all preds for a block into a contiguous sequence, we build
455        // a list of (succ, pred) tuples and then sort.
456        let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> =
457            Vec::with_capacity(self.vcode.block_succs_preds.len());
458        for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() {
459            let pred = BlockIndex::new(pred);
460            for i in start..end {
461                let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index());
462                succ_pred_edges.push((succ, pred));
463            }
464        }
465        succ_pred_edges.sort_unstable();
466
467        let mut i = 0;
468        for succ in 0..self.vcode.num_blocks() {
469            let succ = BlockIndex::new(succ);
470            let start = self.vcode.block_succs_preds.len();
471            while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ {
472                let pred = succ_pred_edges[i].1;
473                self.vcode.block_succs_preds.push(pred);
474                i += 1;
475            }
476            let end = self.vcode.block_succs_preds.len();
477            self.vcode.block_pred_range.push((start as u32, end as u32));
478        }
479    }
480
481    /// Called once, when a build in Backward order is complete, to
482    /// perform the overall reversal (into final forward order) and
483    /// finalize metadata accordingly.
484    fn reverse_and_finalize(&mut self) {
485        let n_insts = self.vcode.insts.len();
486        if n_insts == 0 {
487            return;
488        }
489
490        // Reverse the per-block and per-inst sequences.
491        self.vcode.block_ranges.reverse();
492        // block_params_range is indexed by block (and blocks were
493        // traversed in reverse) so we reverse it; but block-param
494        // sequences in the concatenated vec can remain in reverse
495        // order (it is effectively an arena of arbitrarily-placed
496        // referenced sequences).
497        self.vcode.block_params_range.reverse();
498        // Likewise, we reverse block_succ_range, but the block_succ
499        // concatenated array can remain as-is.
500        self.vcode.block_succ_range.reverse();
501        self.vcode.insts.reverse();
502        self.vcode.srclocs.reverse();
503        // Likewise, branch_block_arg_succ_range is indexed by block
504        // so must be reversed.
505        self.vcode.branch_block_arg_succ_range.reverse();
506
507        // To translate an instruction index *endpoint* in reversed
508        // order to forward order, compute `n_insts - i`.
509        //
510        // Why not `n_insts - 1 - i`? That would be correct to
511        // translate an individual instruction index (for ten insts 0
512        // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
513        // 0). But for the usual inclusive-start, exclusive-end range
514        // idiom, inclusive starts become exclusive ends and
515        // vice-versa, so e.g. an (inclusive) start of 0 becomes an
516        // (exclusive) end of 10.
517        let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
518
519        // Edit the block-range instruction indices.
520        for tuple in &mut self.vcode.block_ranges {
521            let (start, end) = *tuple;
522            *tuple = (translate(end), translate(start)); // Note reversed order.
523        }
524
525        // Generate debug-value labels based on per-label maps.
526        for (label, tuples) in &self.debug_info {
527            for &(start, end, vreg) in tuples {
528                let vreg = self.resolve_vreg_alias(vreg);
529                let fwd_start = translate(end);
530                let fwd_end = translate(start);
531                self.vcode
532                    .debug_value_labels
533                    .push((vreg, fwd_start, fwd_end, label.as_u32()));
534            }
535        }
536
537        // Now sort debug value labels by VReg, as required
538        // by regalloc2.
539        self.vcode
540            .debug_value_labels
541            .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
542    }
543
544    fn collect_operands(&mut self, allocatable: PRegSet) {
545        for (i, insn) in self.vcode.insts.iter().enumerate() {
546            // Push operands from the instruction onto the operand list.
547            //
548            // We rename through the vreg alias table as we collect
549            // the operands. This is better than a separate post-pass
550            // over operands, because it has more cache locality:
551            // operands only need to pass through L1 once. This is
552            // also better than renaming instructions'
553            // operands/registers while lowering, because here we only
554            // need to do the `match` over the instruction to visit
555            // its register fields (which is slow, branchy code) once.
556
557            let vreg_aliases = &self.vcode.vreg_aliases;
558            let mut op_collector =
559                OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
560                    Self::resolve_vreg_alias_impl(vreg_aliases, vreg)
561                });
562            insn.get_operands(&mut op_collector);
563            let (ops, clobbers) = op_collector.finish();
564            self.vcode.operand_ranges.push(ops);
565
566            if clobbers != PRegSet::default() {
567                self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
568            }
569
570            if let Some((dst, src)) = insn.is_move() {
571                // We should never see non-virtual registers present in move
572                // instructions.
573                assert!(
574                    src.is_virtual(),
575                    "the real register {:?} was used as the source of a move instruction",
576                    src
577                );
578                assert!(
579                    dst.to_reg().is_virtual(),
580                    "the real register {:?} was used as the destination of a move instruction",
581                    dst.to_reg()
582                );
583
584                let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into()));
585                let dst = Operand::reg_def(Self::resolve_vreg_alias_impl(
586                    vreg_aliases,
587                    dst.to_reg().into(),
588                ));
589
590                // Note that regalloc2 requires these in (src, dst) order.
591                self.vcode.is_move.insert(InsnIndex::new(i), (src, dst));
592            }
593        }
594
595        // Translate blockparam args via the vreg aliases table as well.
596        for arg in &mut self.vcode.branch_block_args {
597            let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg);
598            trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
599            *arg = new_arg;
600        }
601    }
602
603    /// Build the final VCode.
604    pub fn build(mut self, allocatable: PRegSet, vregs: VRegAllocator<I>) -> VCode<I> {
605        self.vcode.vreg_types = vregs.vreg_types;
606        self.vcode.reftyped_vregs = vregs.reftyped_vregs;
607
608        if self.direction == VCodeBuildDirection::Backward {
609            self.reverse_and_finalize();
610        }
611        self.collect_operands(allocatable);
612
613        // Apply register aliases to the `reftyped_vregs` list since this list
614        // will be returned directly to `regalloc2` eventually and all
615        // operands/results of instructions will use the alias-resolved vregs
616        // from `regalloc2`'s perspective.
617        //
618        // Also note that `reftyped_vregs` can't have duplicates, so after the
619        // aliases are applied duplicates are removed.
620        for reg in self.vcode.reftyped_vregs.iter_mut() {
621            *reg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg);
622        }
623        self.vcode.reftyped_vregs.sort();
624        self.vcode.reftyped_vregs.dedup();
625
626        self.compute_preds_from_succs();
627        self.vcode.debug_value_labels.sort_unstable();
628        self.vcode
629    }
630}
631
632/// Is this type a reference type?
633fn is_reftype(ty: Type) -> bool {
634    ty == types::R64 || ty == types::R32
635}
636
637impl<I: VCodeInst> VCode<I> {
638    /// New empty VCode.
639    fn new(
640        sigs: SigSet,
641        abi: Callee<I::ABIMachineSpec>,
642        emit_info: I::Info,
643        block_order: BlockLoweringOrder,
644        constants: VCodeConstants,
645    ) -> VCode<I> {
646        let n_blocks = block_order.lowered_order().len();
647        VCode {
648            sigs,
649            vreg_types: vec![],
650            insts: Vec::with_capacity(10 * n_blocks),
651            operands: Vec::with_capacity(30 * n_blocks),
652            operand_ranges: Vec::with_capacity(10 * n_blocks),
653            clobbers: FxHashMap::default(),
654            is_move: FxHashMap::default(),
655            srclocs: Vec::with_capacity(10 * n_blocks),
656            entry: BlockIndex::new(0),
657            block_ranges: Vec::with_capacity(n_blocks),
658            block_succ_range: Vec::with_capacity(n_blocks),
659            block_succs_preds: Vec::with_capacity(2 * n_blocks),
660            block_pred_range: Vec::with_capacity(n_blocks),
661            block_params_range: Vec::with_capacity(n_blocks),
662            block_params: Vec::with_capacity(5 * n_blocks),
663            branch_block_args: Vec::with_capacity(10 * n_blocks),
664            branch_block_arg_range: Vec::with_capacity(2 * n_blocks),
665            branch_block_arg_succ_range: Vec::with_capacity(n_blocks),
666            block_order,
667            abi,
668            emit_info,
669            reftyped_vregs: vec![],
670            constants,
671            debug_value_labels: vec![],
672            vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()),
673        }
674    }
675
676    /// Get the number of blocks. Block indices will be in the range `0 ..
677    /// (self.num_blocks() - 1)`.
678    pub fn num_blocks(&self) -> usize {
679        self.block_ranges.len()
680    }
681
682    /// The number of lowered instructions.
683    pub fn num_insts(&self) -> usize {
684        self.insts.len()
685    }
686
687    /// Get the successors for a block.
688    pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
689        let (start, end) = self.block_succ_range[block.index()];
690        &self.block_succs_preds[start as usize..end as usize]
691    }
692
693    fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
694        // Compute clobbered registers.
695        let mut clobbered = vec![];
696        let mut clobbered_set = FxHashSet::default();
697
698        // All moves are included in clobbers.
699        for edit in &regalloc.edits {
700            let Edit::Move { to, .. } = edit.1;
701            if let Some(preg) = to.as_reg() {
702                let reg = RealReg::from(preg);
703                if clobbered_set.insert(reg) {
704                    clobbered.push(Writable::from_reg(reg));
705                }
706            }
707        }
708
709        for (i, (start, end)) in self.operand_ranges.iter().enumerate() {
710            // Skip this instruction if not "included in clobbers" as
711            // per the MachInst. (Some backends use this to implement
712            // ABI specifics; e.g., excluding calls of the same ABI as
713            // the current function from clobbers, because by
714            // definition everything clobbered by the call can be
715            // clobbered by this function without saving as well.)
716            if !self.insts[i].is_included_in_clobbers() {
717                continue;
718            }
719
720            let start = *start as usize;
721            let end = *end as usize;
722            let operands = &self.operands[start..end];
723            let allocs = &regalloc.allocs[start..end];
724            for (operand, alloc) in operands.iter().zip(allocs.iter()) {
725                // We're interested only in writes (Mods or Defs).
726                if operand.kind() == OperandKind::Use {
727                    continue;
728                }
729                if let Some(preg) = alloc.as_reg() {
730                    let reg = RealReg::from(preg);
731                    if clobbered_set.insert(reg) {
732                        clobbered.push(Writable::from_reg(reg));
733                    }
734                }
735            }
736
737            // Also add explicitly-clobbered registers.
738            for preg in self
739                .clobbers
740                .get(&InsnIndex::new(i))
741                .cloned()
742                .unwrap_or_default()
743            {
744                let reg = RealReg::from(preg);
745                if clobbered_set.insert(reg) {
746                    clobbered.push(Writable::from_reg(reg));
747                }
748            }
749        }
750
751        clobbered
752    }
753
754    /// Emit the instructions to a `MachBuffer`, containing fixed-up
755    /// code and external reloc/trap/etc. records ready for use. Takes
756    /// the regalloc results as well.
757    ///
758    /// Returns the machine code itself, and optionally metadata
759    /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
760    /// is consumed by the emission process.
761    pub fn emit(
762        mut self,
763        regalloc: &regalloc2::Output,
764        want_disasm: bool,
765        want_metadata: bool,
766    ) -> EmitResult<I>
767    where
768        I: VCodeInst,
769    {
770        // To write into disasm string.
771        use core::fmt::Write;
772
773        let _tt = timing::vcode_emit();
774        let mut buffer = MachBuffer::new();
775        let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
776
777        // The first M MachLabels are reserved for block indices, the next N MachLabels for
778        // constants.
779        buffer.reserve_labels_for_blocks(self.num_blocks());
780        buffer.reserve_labels_for_constants(&self.constants);
781
782        // Construct the final order we emit code in: cold blocks at the end.
783        let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
784        let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
785        for block in 0..self.num_blocks() {
786            let block = BlockIndex::new(block);
787            if self.block_order.is_cold(block) {
788                cold_blocks.push(block);
789            } else {
790                final_order.push(block);
791            }
792        }
793        final_order.extend(cold_blocks.clone());
794
795        // Compute/save info we need for the prologue: clobbers and
796        // number of spillslots.
797        //
798        // We clone `abi` here because we will mutate it as we
799        // generate the prologue and set other info, but we can't
800        // mutate `VCode`. The info it usually carries prior to
801        // setting clobbers is fairly minimal so this should be
802        // relatively cheap.
803        let clobbers = self.compute_clobbers(regalloc);
804        self.abi.set_num_spillslots(regalloc.num_spillslots);
805        self.abi.set_clobbered(clobbers);
806
807        // We need to generate the prologue in order to get the ABI
808        // object into the right state first. We'll emit it when we
809        // hit the right block below.
810        let prologue_insts = self.abi.gen_prologue(&self.sigs);
811
812        // Emit blocks.
813        let mut cur_srcloc = None;
814        let mut last_offset = None;
815        let mut inst_offsets = vec![];
816        let mut state = I::State::new(&self.abi);
817
818        let mut disasm = String::new();
819
820        if !self.debug_value_labels.is_empty() {
821            inst_offsets.resize(self.insts.len(), 0);
822        }
823
824        // Count edits per block ahead of time; this is needed for
825        // lookahead island emission. (We could derive it per-block
826        // with binary search in the edit list, but it's more
827        // efficient to do it in one pass here.)
828        let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
829        let mut edit_idx = 0;
830        for block in 0..self.num_blocks() {
831            let end_inst = self.block_ranges[block].1;
832            let start_edit_idx = edit_idx;
833            while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
834                edit_idx += 1;
835            }
836            let end_edit_idx = edit_idx;
837            ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
838        }
839
840        let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
841
842        for (block_order_idx, &block) in final_order.iter().enumerate() {
843            trace!("emitting block {:?}", block);
844            let new_offset = I::align_basic_block(buffer.cur_offset());
845            while new_offset > buffer.cur_offset() {
846                // Pad with NOPs up to the aligned block offset.
847                let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
848                nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default());
849            }
850            assert_eq!(buffer.cur_offset(), new_offset);
851
852            let do_emit = |inst: &I,
853                           allocs: &[Allocation],
854                           disasm: &mut String,
855                           buffer: &mut MachBuffer<I>,
856                           state: &mut I::State| {
857                if want_disasm && !inst.is_args() {
858                    let mut s = state.clone();
859                    writeln!(disasm, "  {}", inst.pretty_print_inst(allocs, &mut s)).unwrap();
860                }
861                inst.emit(allocs, buffer, &self.emit_info, state);
862            };
863
864            // Is this the first block? Emit the prologue directly if so.
865            if block == self.entry {
866                trace!(" -> entry block");
867                buffer.start_srcloc(Default::default());
868                state.pre_sourceloc(Default::default());
869                for inst in &prologue_insts {
870                    do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
871                }
872                buffer.end_srcloc();
873            }
874
875            // Now emit the regular block body.
876
877            buffer.bind_label(MachLabel::from_block(block));
878
879            if want_disasm {
880                writeln!(&mut disasm, "block{}:", block.index()).unwrap();
881            }
882
883            if want_metadata {
884                // Track BB starts. If we have backed up due to MachBuffer
885                // branch opts, note that the removed blocks were removed.
886                let cur_offset = buffer.cur_offset();
887                if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
888                    for i in (0..bb_starts.len()).rev() {
889                        if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
890                            break;
891                        }
892                        bb_starts[i] = None;
893                    }
894                }
895                bb_starts.push(Some(cur_offset));
896                last_offset = Some(cur_offset);
897            }
898
899            if let Some(block_start) = I::gen_block_start(
900                self.block_order.is_indirect_branch_target(block),
901                is_forward_edge_cfi_enabled,
902            ) {
903                do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state);
904            }
905
906            for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
907                match inst_or_edit {
908                    InstOrEdit::Inst(iix) => {
909                        if !self.debug_value_labels.is_empty() {
910                            // If we need to produce debug info,
911                            // record the offset of each instruction
912                            // so that we can translate value-label
913                            // ranges to machine-code offsets.
914
915                            // Cold blocks violate monotonicity
916                            // assumptions elsewhere (that
917                            // instructions in inst-index order are in
918                            // order in machine code), so we omit
919                            // their offsets here. Value-label range
920                            // generation below will skip empty ranges
921                            // and ranges with to-offsets of zero.
922                            if !self.block_order.is_cold(block) {
923                                inst_offsets[iix.index()] = buffer.cur_offset();
924                            }
925                        }
926
927                        if self.insts[iix.index()].is_move().is_some() {
928                            // Skip moves in the pre-regalloc program;
929                            // all of these are incorporated by the
930                            // regalloc into its unified move handling
931                            // and they come out the other end, if
932                            // still needed (not elided), as
933                            // regalloc-inserted moves.
934                            continue;
935                        }
936
937                        // Update the srcloc at this point in the buffer.
938                        let srcloc = self.srclocs[iix.index()];
939                        if cur_srcloc != Some(srcloc) {
940                            if cur_srcloc.is_some() {
941                                buffer.end_srcloc();
942                            }
943                            buffer.start_srcloc(srcloc);
944                            cur_srcloc = Some(srcloc);
945                        }
946                        state.pre_sourceloc(cur_srcloc.unwrap_or_default());
947
948                        // If this is a safepoint, compute a stack map
949                        // and pass it to the emit state.
950                        if self.insts[iix.index()].is_safepoint() {
951                            let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
952                            // Find the contiguous range of
953                            // (progpoint, allocation) safepoint slot
954                            // records in `regalloc.safepoint_slots`
955                            // for this instruction index.
956                            let safepoint_slots_start = regalloc
957                                .safepoint_slots
958                                .binary_search_by(|(progpoint, _alloc)| {
959                                    if progpoint.inst() >= iix {
960                                        std::cmp::Ordering::Greater
961                                    } else {
962                                        std::cmp::Ordering::Less
963                                    }
964                                })
965                                .unwrap_err();
966
967                            for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
968                                .iter()
969                                .take_while(|(progpoint, _)| progpoint.inst() == iix)
970                            {
971                                let slot = alloc.as_stack().unwrap();
972                                safepoint_slots.push(slot);
973                            }
974                            if !safepoint_slots.is_empty() {
975                                let stack_map = self
976                                    .abi
977                                    .spillslots_to_stack_map(&safepoint_slots[..], &state);
978                                state.pre_safepoint(stack_map);
979                            }
980                        }
981
982                        // Get the allocations for this inst from the regalloc result.
983                        let allocs = regalloc.inst_allocs(iix);
984
985                        // If the instruction we are about to emit is
986                        // a return, place an epilogue at this point
987                        // (and don't emit the return; the actual
988                        // epilogue will contain it).
989                        if self.insts[iix.index()].is_term() == MachTerminator::Ret {
990                            for inst in self.abi.gen_epilogue() {
991                                do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
992                            }
993                        } else {
994                            // Emit the instruction!
995                            do_emit(
996                                &self.insts[iix.index()],
997                                allocs,
998                                &mut disasm,
999                                &mut buffer,
1000                                &mut state,
1001                            );
1002                        }
1003                    }
1004
1005                    InstOrEdit::Edit(Edit::Move { from, to }) => {
1006                        // Create a move/spill/reload instruction and
1007                        // immediately emit it.
1008                        match (from.as_reg(), to.as_reg()) {
1009                            (Some(from), Some(to)) => {
1010                                // Reg-to-reg move.
1011                                let from_rreg = Reg::from(from);
1012                                let to_rreg = Writable::from_reg(Reg::from(to));
1013                                debug_assert_eq!(from.class(), to.class());
1014                                let ty = I::canonical_type_for_rc(from.class());
1015                                let mv = I::gen_move(to_rreg, from_rreg, ty);
1016                                do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state);
1017                            }
1018                            (Some(from), None) => {
1019                                // Spill from register to spillslot.
1020                                let to = to.as_stack().unwrap();
1021                                let from_rreg = RealReg::from(from);
1022                                let spill = self.abi.gen_spill(to, from_rreg);
1023                                do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state);
1024                            }
1025                            (None, Some(to)) => {
1026                                // Load from spillslot to register.
1027                                let from = from.as_stack().unwrap();
1028                                let to_rreg = Writable::from_reg(RealReg::from(to));
1029                                let reload = self.abi.gen_reload(to_rreg, from);
1030                                do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state);
1031                            }
1032                            (None, None) => {
1033                                panic!("regalloc2 should have eliminated stack-to-stack moves!");
1034                            }
1035                        }
1036                    }
1037                }
1038            }
1039
1040            if cur_srcloc.is_some() {
1041                buffer.end_srcloc();
1042                cur_srcloc = None;
1043            }
1044
1045            // Do we need an island? Get the worst-case size of the
1046            // next BB and see if, having emitted that many bytes, we
1047            // will be beyond the deadline.
1048            if block_order_idx < final_order.len() - 1 {
1049                let next_block = final_order[block_order_idx + 1];
1050                let next_block_range = self.block_ranges[next_block.index()];
1051                let next_block_size =
1052                    (next_block_range.1.index() - next_block_range.0.index()) as u32;
1053                let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1054                let worst_case_next_bb =
1055                    I::worst_case_size() * (next_block_size + next_block_ra_insertions);
1056                if buffer.island_needed(worst_case_next_bb) {
1057                    buffer.emit_island(worst_case_next_bb);
1058                }
1059            }
1060        }
1061
1062        // Emit the constants used by the function.
1063        let mut alignment = 1;
1064        for (constant, data) in self.constants.iter() {
1065            alignment = data.alignment().max(alignment);
1066
1067            let label = buffer.get_label_for_constant(constant);
1068            buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
1069        }
1070
1071        let func_body_len = buffer.cur_offset();
1072
1073        // Create `bb_edges` and final (filtered) `bb_starts`.
1074        let mut bb_edges = vec![];
1075        let mut bb_offsets = vec![];
1076        if want_metadata {
1077            for block in 0..self.num_blocks() {
1078                if bb_starts[block].is_none() {
1079                    // Block was deleted by MachBuffer; skip.
1080                    continue;
1081                }
1082                let from = bb_starts[block].unwrap();
1083
1084                bb_offsets.push(from);
1085                // Resolve each `succ` label and add edges.
1086                let succs = self.block_succs(BlockIndex::new(block));
1087                for &succ in succs.iter() {
1088                    let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1089                    bb_edges.push((from, to));
1090                }
1091            }
1092        }
1093
1094        let value_labels_ranges =
1095            self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1096        let frame_size = self.abi.frame_size();
1097
1098        EmitResult {
1099            buffer,
1100            bb_offsets,
1101            bb_edges,
1102            inst_offsets,
1103            func_body_len,
1104            disasm: if want_disasm { Some(disasm) } else { None },
1105            sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1106            dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1107            value_labels_ranges,
1108            frame_size,
1109            alignment,
1110        }
1111    }
1112
1113    fn compute_value_labels_ranges(
1114        &self,
1115        regalloc: &regalloc2::Output,
1116        inst_offsets: &[CodeOffset],
1117        func_body_len: u32,
1118    ) -> ValueLabelsRanges {
1119        if self.debug_value_labels.is_empty() {
1120            return ValueLabelsRanges::default();
1121        }
1122
1123        let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1124        for &(label, from, to, alloc) in &regalloc.debug_locations {
1125            let ranges = value_labels_ranges
1126                .entry(ValueLabel::from_u32(label))
1127                .or_insert_with(|| vec![]);
1128            let from_offset = inst_offsets[from.inst().index()];
1129            let to_offset = if to.inst().index() == inst_offsets.len() {
1130                func_body_len
1131            } else {
1132                inst_offsets[to.inst().index()]
1133            };
1134
1135            // Empty range or to-offset of zero can happen because of
1136            // cold blocks (see above).
1137            if to_offset == 0 || from_offset == to_offset {
1138                continue;
1139            }
1140
1141            let loc = if let Some(preg) = alloc.as_reg() {
1142                LabelValueLoc::Reg(Reg::from(preg))
1143            } else {
1144                // We can't translate spillslot locations at the
1145                // moment because ValueLabelLoc requires an
1146                // instantaneous SP offset, and this can *change*
1147                // within the range we have here because of callsites
1148                // adjusting SP temporarily. To avoid the complexity
1149                // of accurately plumbing through nominal-SP
1150                // adjustment sites, we just omit debug info for
1151                // values that are spilled. Not ideal, but debug info
1152                // is best-effort.
1153                continue;
1154            };
1155
1156            ranges.push(ValueLocRange {
1157                loc,
1158                // ValueLocRanges are recorded by *instruction-end
1159                // offset*. `from_offset` is the *start* of the
1160                // instruction; that is the same as the end of another
1161                // instruction, so we only want to begin coverage once
1162                // we are past the previous instruction's end.
1163                start: from_offset + 1,
1164                // Likewise, `end` is exclusive, but we want to
1165                // *include* the end of the last
1166                // instruction. `to_offset` is the start of the
1167                // `to`-instruction, which is the exclusive end, i.e.,
1168                // the first instruction not covered. That
1169                // instruction's start is the same as the end of the
1170                // last instruction that is included, so we go one
1171                // byte further to be sure to include it.
1172                end: to_offset + 1,
1173            });
1174        }
1175
1176        value_labels_ranges
1177    }
1178
1179    /// Get the IR block for a BlockIndex, if one exists.
1180    pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1181        self.block_order.lowered_order()[block.index()].orig_block()
1182    }
1183
1184    #[inline]
1185    fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] {
1186        for vreg in list {
1187            self.assert_not_vreg_alias(*vreg);
1188        }
1189        list
1190    }
1191
1192    #[inline]
1193    fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg {
1194        debug_assert!(VCodeBuilder::<I>::resolve_vreg_alias_impl(&self.vreg_aliases, vreg) == vreg);
1195        vreg
1196    }
1197
1198    #[inline]
1199    fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand {
1200        // It should be true by construction that `Operand`s do not contain any
1201        // aliased vregs since they're all collected and mapped when the VCode
1202        // is itself constructed.
1203        self.assert_not_vreg_alias(op.vreg());
1204        op
1205    }
1206}
1207
1208impl<I: VCodeInst> RegallocFunction for VCode<I> {
1209    fn num_insts(&self) -> usize {
1210        self.insts.len()
1211    }
1212
1213    fn num_blocks(&self) -> usize {
1214        self.block_ranges.len()
1215    }
1216
1217    fn entry_block(&self) -> BlockIndex {
1218        self.entry
1219    }
1220
1221    fn block_insns(&self, block: BlockIndex) -> InstRange {
1222        let (start, end) = self.block_ranges[block.index()];
1223        InstRange::forward(start, end)
1224    }
1225
1226    fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1227        let (start, end) = self.block_succ_range[block.index()];
1228        &self.block_succs_preds[start as usize..end as usize]
1229    }
1230
1231    fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1232        let (start, end) = self.block_pred_range[block.index()];
1233        &self.block_succs_preds[start as usize..end as usize]
1234    }
1235
1236    fn block_params(&self, block: BlockIndex) -> &[VReg] {
1237        // As a special case we don't return block params for the entry block, as all the arguments
1238        // will be defined by the `Inst::Args` instruction.
1239        if block == self.entry {
1240            return &[];
1241        }
1242
1243        let (start, end) = self.block_params_range[block.index()];
1244        let ret = &self.block_params[start as usize..end as usize];
1245        // Currently block params are never aliased to another vreg, but
1246        // double-check just to be sure.
1247        self.assert_no_vreg_aliases(ret)
1248    }
1249
1250    fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1251        let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()];
1252        let succ_ranges =
1253            &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize];
1254        let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx];
1255        let ret = &self.branch_block_args
1256            [branch_block_args_start as usize..branch_block_args_end as usize];
1257        self.assert_no_vreg_aliases(ret)
1258    }
1259
1260    fn is_ret(&self, insn: InsnIndex) -> bool {
1261        match self.insts[insn.index()].is_term() {
1262            // We treat blocks terminated by an unconditional trap like a return for regalloc.
1263            MachTerminator::None => self.insts[insn.index()].is_trap(),
1264            MachTerminator::Ret => true,
1265            _ => false,
1266        }
1267    }
1268
1269    fn is_branch(&self, insn: InsnIndex) -> bool {
1270        match self.insts[insn.index()].is_term() {
1271            MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1272            _ => false,
1273        }
1274    }
1275
1276    fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1277        self.insts[insn.index()].is_safepoint()
1278    }
1279
1280    fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> {
1281        let (a, b) = self.is_move.get(&insn)?;
1282        Some((
1283            self.assert_operand_not_vreg_alias(*a),
1284            self.assert_operand_not_vreg_alias(*b),
1285        ))
1286    }
1287
1288    fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1289        let (start, end) = self.operand_ranges[insn.index()];
1290        let ret = &self.operands[start as usize..end as usize];
1291        for op in ret {
1292            self.assert_operand_not_vreg_alias(*op);
1293        }
1294        ret
1295    }
1296
1297    fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1298        self.clobbers.get(&insn).cloned().unwrap_or_default()
1299    }
1300
1301    fn num_vregs(&self) -> usize {
1302        std::cmp::max(self.vreg_types.len(), first_user_vreg_index())
1303    }
1304
1305    fn reftype_vregs(&self) -> &[VReg] {
1306        self.assert_no_vreg_aliases(&self.reftyped_vregs[..])
1307    }
1308
1309    fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1310        // VRegs here are inserted into `debug_value_labels` after code is
1311        // generated and aliases are fully defined, so no double-check that
1312        // aliases are not lingering.
1313        for (vreg, ..) in self.debug_value_labels.iter() {
1314            self.assert_not_vreg_alias(*vreg);
1315        }
1316        &self.debug_value_labels[..]
1317    }
1318
1319    fn spillslot_size(&self, regclass: RegClass) -> usize {
1320        self.abi.get_spillslot_size(regclass) as usize
1321    }
1322
1323    fn allow_multiple_vreg_defs(&self) -> bool {
1324        // At least the s390x backend requires this, because the
1325        // `Loop` pseudo-instruction aggregates all Operands so pinned
1326        // vregs (RealRegs) may occur more than once.
1327        true
1328    }
1329}
1330
1331impl<I: VCodeInst> fmt::Debug for VCode<I> {
1332    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1333        writeln!(f, "VCode {{")?;
1334        writeln!(f, "  Entry block: {}", self.entry.index())?;
1335
1336        let mut state = Default::default();
1337
1338        let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1339        alias_keys.sort_unstable();
1340        for key in alias_keys {
1341            let dest = self.vreg_aliases.get(&key).unwrap();
1342            writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1343        }
1344
1345        for block in 0..self.num_blocks() {
1346            let block = BlockIndex::new(block);
1347            writeln!(f, "Block {}:", block.index())?;
1348            if let Some(bb) = self.bindex_to_bb(block) {
1349                writeln!(f, "    (original IR block: {})", bb)?;
1350            }
1351            for succ in self.succs(block) {
1352                writeln!(f, "    (successor: Block {})", succ.index())?;
1353            }
1354            let (start, end) = self.block_ranges[block.index()];
1355            writeln!(
1356                f,
1357                "    (instruction range: {} .. {})",
1358                start.index(),
1359                end.index()
1360            )?;
1361            for inst in start.index()..end.index() {
1362                writeln!(
1363                    f,
1364                    "  Inst {}: {}",
1365                    inst,
1366                    self.insts[inst].pretty_print_inst(&[], &mut state)
1367                )?;
1368            }
1369        }
1370
1371        writeln!(f, "}}")?;
1372        Ok(())
1373    }
1374}
1375
1376/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1377pub struct VRegAllocator<I> {
1378    /// Next virtual register number to allocate.
1379    next_vreg: usize,
1380
1381    /// VReg IR-level types.
1382    vreg_types: Vec<Type>,
1383
1384    /// A set with the same contents as `reftyped_vregs`, in order to
1385    /// avoid inserting more than once.
1386    reftyped_vregs_set: FxHashSet<VReg>,
1387
1388    /// Reference-typed `regalloc2::VReg`s. The regalloc requires
1389    /// these in a dense slice (as opposed to querying the
1390    /// reftype-status of each vreg) for efficient iteration.
1391    reftyped_vregs: Vec<VReg>,
1392
1393    /// The type of instruction that this allocator makes registers for.
1394    _inst: core::marker::PhantomData<I>,
1395}
1396
1397impl<I: VCodeInst> VRegAllocator<I> {
1398    /// Make a new VRegAllocator.
1399    pub fn new() -> Self {
1400        Self {
1401            next_vreg: first_user_vreg_index(),
1402            vreg_types: vec![],
1403            reftyped_vregs_set: FxHashSet::default(),
1404            reftyped_vregs: vec![],
1405            _inst: core::marker::PhantomData::default(),
1406        }
1407    }
1408
1409    /// Allocate a fresh ValueRegs.
1410    pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1411        let v = self.next_vreg;
1412        let (regclasses, tys) = I::rc_for_type(ty)?;
1413        self.next_vreg += regclasses.len();
1414        if self.next_vreg >= VReg::MAX {
1415            return Err(CodegenError::CodeTooLarge);
1416        }
1417
1418        let regs: ValueRegs<Reg> = match regclasses {
1419            &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1420            &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1421            // We can extend this if/when we support 32-bit targets; e.g.,
1422            // an i128 on a 32-bit machine will need up to four machine regs
1423            // for a `Value`.
1424            _ => panic!("Value must reside in 1 or 2 registers"),
1425        };
1426        for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1427            self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
1428        }
1429        Ok(regs)
1430    }
1431
1432    /// Set the type of this virtual register.
1433    pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
1434        if self.vreg_types.len() <= vreg.index() {
1435            self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID);
1436        }
1437        self.vreg_types[vreg.index()] = ty;
1438        if is_reftype(ty) {
1439            let vreg: VReg = vreg.into();
1440            if self.reftyped_vregs_set.insert(vreg) {
1441                self.reftyped_vregs.push(vreg);
1442            }
1443        }
1444    }
1445}
1446
1447/// This structure tracks the large constants used in VCode that will be emitted separately by the
1448/// [MachBuffer].
1449///
1450/// First, during the lowering phase, constants are inserted using
1451/// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
1452/// used in this phase. Some deduplication is performed, when possible, as constant
1453/// values are inserted.
1454///
1455/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1456/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1457/// then writes the constant values to the buffer.
1458#[derive(Default)]
1459pub struct VCodeConstants {
1460    constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1461    pool_uses: HashMap<Constant, VCodeConstant>,
1462    well_known_uses: HashMap<*const [u8], VCodeConstant>,
1463    u64s: HashMap<[u8; 8], VCodeConstant>,
1464}
1465impl VCodeConstants {
1466    /// Initialize the structure with the expected number of constants.
1467    pub fn with_capacity(expected_num_constants: usize) -> Self {
1468        Self {
1469            constants: PrimaryMap::with_capacity(expected_num_constants),
1470            pool_uses: HashMap::with_capacity(expected_num_constants),
1471            well_known_uses: HashMap::new(),
1472            u64s: HashMap::new(),
1473        }
1474    }
1475
1476    /// Insert a constant; using this method indicates that a constant value will be used and thus
1477    /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1478    /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1479    /// [VCodeConstantData::Generated].
1480    pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1481        match data {
1482            VCodeConstantData::Generated(_) => self.constants.push(data),
1483            VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1484                None => {
1485                    let vcode_constant = self.constants.push(data);
1486                    self.pool_uses.insert(constant, vcode_constant);
1487                    vcode_constant
1488                }
1489                Some(&vcode_constant) => vcode_constant,
1490            },
1491            VCodeConstantData::WellKnown(data_ref) => {
1492                match self.well_known_uses.entry(data_ref as *const [u8]) {
1493                    Entry::Vacant(v) => {
1494                        let vcode_constant = self.constants.push(data);
1495                        v.insert(vcode_constant);
1496                        vcode_constant
1497                    }
1498                    Entry::Occupied(o) => *o.get(),
1499                }
1500            }
1501            VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1502                Entry::Vacant(v) => {
1503                    let vcode_constant = self.constants.push(data);
1504                    v.insert(vcode_constant);
1505                    vcode_constant
1506                }
1507                Entry::Occupied(o) => *o.get(),
1508            },
1509        }
1510    }
1511
1512    /// Return the number of constants inserted.
1513    pub fn len(&self) -> usize {
1514        self.constants.len()
1515    }
1516
1517    /// Iterate over the [VCodeConstant] keys inserted in this structure.
1518    pub fn keys(&self) -> Keys<VCodeConstant> {
1519        self.constants.keys()
1520    }
1521
1522    /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
1523    /// structure.
1524    pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1525        self.constants.iter()
1526    }
1527}
1528
1529/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1530#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1531pub struct VCodeConstant(u32);
1532entity_impl!(VCodeConstant);
1533
1534/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1535/// these separately instead of as raw byte buffers allows us to avoid some duplication.
1536pub enum VCodeConstantData {
1537    /// A constant already present in the Cranelift IR
1538    /// [ConstantPool](crate::ir::constant::ConstantPool).
1539    Pool(Constant, ConstantData),
1540    /// A reference to a well-known constant value that is statically encoded within the compiler.
1541    WellKnown(&'static [u8]),
1542    /// A constant value generated during lowering; the value may depend on the instruction context
1543    /// which makes it difficult to de-duplicate--if possible, use other variants.
1544    Generated(ConstantData),
1545    /// A constant of at most 64 bits. These are deduplicated as
1546    /// well. Stored as a fixed-size array of `u8` so that we do not
1547    /// encounter endianness problems when cross-compiling.
1548    U64([u8; 8]),
1549}
1550impl VCodeConstantData {
1551    /// Retrieve the constant data as a byte slice.
1552    pub fn as_slice(&self) -> &[u8] {
1553        match self {
1554            VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1555            VCodeConstantData::WellKnown(d) => d,
1556            VCodeConstantData::U64(value) => &value[..],
1557        }
1558    }
1559
1560    /// Calculate the alignment of the constant data.
1561    pub fn alignment(&self) -> u32 {
1562        if self.as_slice().len() <= 8 {
1563            8
1564        } else {
1565            16
1566        }
1567    }
1568}
1569
1570#[cfg(test)]
1571mod test {
1572    use super::*;
1573    use std::mem::size_of;
1574
1575    #[test]
1576    fn size_of_constant_structs() {
1577        assert_eq!(size_of::<Constant>(), 4);
1578        assert_eq!(size_of::<VCodeConstant>(), 4);
1579        assert_eq!(size_of::<ConstantData>(), 24);
1580        assert_eq!(size_of::<VCodeConstantData>(), 32);
1581        assert_eq!(
1582            size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1583            24
1584        );
1585        // TODO The VCodeConstants structure's memory size could be further optimized.
1586        // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1587        // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1588    }
1589}