cranelift_codegen/machinst/vcode.rs
1//! This implements the VCode container: a CFG of Insts that have been lowered.
2//!
3//! VCode is virtual-register code. An instruction in VCode is almost a machine
4//! instruction; however, its register slots can refer to virtual registers in
5//! addition to real machine registers.
6//!
7//! VCode is structured with traditional basic blocks, and
8//! each block must be terminated by an unconditional branch (one target), a
9//! conditional branch (two targets), or a return (no targets). Note that this
10//! slightly differs from the machine code of most ISAs: in most ISAs, a
11//! conditional branch has one target (and the not-taken case falls through).
12//! However, we expect that machine backends will elide branches to the following
13//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15//! play with layout prior to final binary emission, as well, if we want.
16//!
17//! See the main module comment in `mod.rs` for more details on the VCode-based
18//! backend pipeline.
19
20use crate::fx::FxHashMap;
21use crate::fx::FxHashSet;
22use crate::ir::RelSourceLoc;
23use crate::ir::{self, types, Constant, ConstantData, DynamicStackSlot, LabelValueLoc, ValueLabel};
24use crate::machinst::*;
25use crate::timing;
26use crate::trace;
27use crate::CodegenError;
28use crate::ValueLocRange;
29use regalloc2::{
30 Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PRegSet,
31 RegClass, VReg,
32};
33
34use alloc::vec::Vec;
35use cranelift_entity::{entity_impl, Keys, PrimaryMap};
36use std::collections::hash_map::Entry;
37use std::collections::HashMap;
38use std::fmt;
39
40/// Index referring to an instruction in VCode.
41pub type InsnIndex = regalloc2::Inst;
42
43/// Index referring to a basic block in VCode.
44pub type BlockIndex = regalloc2::Block;
45
46/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
47/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
48pub trait VCodeInst: MachInst + MachInstEmit {}
49impl<I: MachInst + MachInstEmit> VCodeInst for I {}
50
51/// A function in "VCode" (virtualized-register code) form, after
52/// lowering. This is essentially a standard CFG of basic blocks,
53/// where each basic block consists of lowered instructions produced
54/// by the machine-specific backend.
55///
56/// Note that the VCode is immutable once produced, and is not
57/// modified by register allocation in particular. Rather, register
58/// allocation on the `VCode` produces a separate `regalloc2::Output`
59/// struct, and this can be passed to `emit`. `emit` in turn does not
60/// modify the vcode, but produces an `EmitResult`, which contains the
61/// machine code itself, and the associated disassembly and/or
62/// metadata as requested.
63pub struct VCode<I: VCodeInst> {
64 /// VReg IR-level types.
65 vreg_types: Vec<Type>,
66
67 /// Lowered machine instructions in order corresponding to the original IR.
68 insts: Vec<I>,
69
70 /// Operands: pre-regalloc references to virtual registers with
71 /// constraints, in one flattened array. This allows the regalloc
72 /// to efficiently access all operands without requiring expensive
73 /// matches or method invocations on insts.
74 operands: Vec<Operand>,
75
76 /// Operand index ranges: for each instruction in `insts`, there
77 /// is a tuple here providing the range in `operands` for that
78 /// instruction's operands.
79 operand_ranges: Vec<(u32, u32)>,
80
81 /// Clobbers: a sparse map from instruction indices to clobber masks.
82 clobbers: FxHashMap<InsnIndex, PRegSet>,
83
84 /// Move information: for a given InsnIndex, (src, dst) operand pair.
85 is_move: FxHashMap<InsnIndex, (Operand, Operand)>,
86
87 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
88 /// reasonable to keep one of these per instruction.)
89 srclocs: Vec<RelSourceLoc>,
90
91 /// Entry block.
92 entry: BlockIndex,
93
94 /// Block instruction indices.
95 block_ranges: Vec<(InsnIndex, InsnIndex)>,
96
97 /// Block successors: index range in the `block_succs_preds` list.
98 block_succ_range: Vec<(u32, u32)>,
99
100 /// Block predecessors: index range in the `block_succs_preds` list.
101 block_pred_range: Vec<(u32, u32)>,
102
103 /// Block successor and predecessor lists, concatenated into one
104 /// Vec. The `block_succ_range` and `block_pred_range` lists of
105 /// tuples above give (start, end) ranges within this list that
106 /// correspond to each basic block's successors or predecessors,
107 /// respectively.
108 block_succs_preds: Vec<regalloc2::Block>,
109
110 /// Block parameters: index range in `block_params` below.
111 block_params_range: Vec<(u32, u32)>,
112
113 /// Block parameter lists, concatenated into one vec. The
114 /// `block_params_range` list of tuples above gives (start, end)
115 /// ranges within this list that correspond to each basic block's
116 /// blockparam vregs.
117 block_params: Vec<regalloc2::VReg>,
118
119 /// Outgoing block arguments on branch instructions, concatenated
120 /// into one list.
121 ///
122 /// Note that this is conceptually a 3D array: we have a VReg list
123 /// per block, per successor. We flatten those three dimensions
124 /// into this 1D vec, then store index ranges in two levels of
125 /// indirection.
126 ///
127 /// Indexed by the indices in `branch_block_arg_succ_range`.
128 branch_block_args: Vec<regalloc2::VReg>,
129
130 /// Array of sequences of (start, end) tuples in
131 /// `branch_block_args`, one for each successor; these sequences
132 /// for each block are concatenated.
133 ///
134 /// Indexed by the indices in `branch_block_arg_succ_range`.
135 branch_block_arg_range: Vec<(u32, u32)>,
136
137 /// For a given block, indices in `branch_block_arg_range`
138 /// corresponding to all of its successors.
139 branch_block_arg_succ_range: Vec<(u32, u32)>,
140
141 /// VReg aliases. Each key in this table is translated to its
142 /// value when gathering Operands from instructions. Aliases are
143 /// not chased transitively (we do not further look up the
144 /// translated reg to see if it is another alias).
145 ///
146 /// We use these aliases to rename an instruction's expected
147 /// result vregs to the returned vregs from lowering, which are
148 /// usually freshly-allocated temps.
149 ///
150 /// Operands and branch arguments will already have been
151 /// translated through this alias table; but it helps to make
152 /// sense of instructions when pretty-printed, for example.
153 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
154
155 /// Block-order information.
156 block_order: BlockLoweringOrder,
157
158 /// ABI object.
159 pub(crate) abi: Callee<I::ABIMachineSpec>,
160
161 /// Constant information used during code emission. This should be
162 /// immutable across function compilations within the same module.
163 emit_info: I::Info,
164
165 /// Reference-typed `regalloc2::VReg`s. The regalloc requires
166 /// these in a dense slice (as opposed to querying the
167 /// reftype-status of each vreg) for efficient iteration.
168 reftyped_vregs: Vec<VReg>,
169
170 /// Constants.
171 constants: VCodeConstants,
172
173 /// Value labels for debuginfo attached to vregs.
174 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
175
176 pub(crate) sigs: SigSet,
177}
178
179/// The result of `VCode::emit`. Contains all information computed
180/// during emission: actual machine code, optionally a disassembly,
181/// and optionally metadata about the code layout.
182pub struct EmitResult<I: VCodeInst> {
183 /// The MachBuffer containing the machine code.
184 pub buffer: MachBuffer<I>,
185
186 /// Offset of each basic block, recorded during emission. Computed
187 /// only if `debug_value_labels` is non-empty.
188 pub bb_offsets: Vec<CodeOffset>,
189
190 /// Final basic-block edges, in terms of code offsets of
191 /// bb-starts. Computed only if `debug_value_labels` is non-empty.
192 pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
193
194 /// Final instruction offsets, recorded during emission. Computed
195 /// only if `debug_value_labels` is non-empty.
196 pub inst_offsets: Vec<CodeOffset>,
197
198 /// Final length of function body.
199 pub func_body_len: CodeOffset,
200
201 /// The pretty-printed disassembly, if any. This uses the same
202 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
203 /// implementation, but additionally includes the prologue and
204 /// epilogue(s), and makes use of the regalloc results.
205 pub disasm: Option<String>,
206
207 /// Offsets of sized stackslots.
208 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
209
210 /// Offsets of dynamic stackslots.
211 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
212
213 /// Value-labels information (debug metadata).
214 pub value_labels_ranges: ValueLabelsRanges,
215
216 /// Stack frame size.
217 pub frame_size: u32,
218
219 /// The alignment requirement for pc-relative loads.
220 pub alignment: u32,
221}
222
223/// A builder for a VCode function body.
224///
225/// This builder has the ability to accept instructions in either
226/// forward or reverse order, depending on the pass direction that
227/// produces the VCode. The lowering from CLIF to VCode<MachInst>
228/// ordinarily occurs in reverse order (in order to allow instructions
229/// to be lowered only if used, and not merged) so a reversal will
230/// occur at the end of lowering to ensure the VCode is in machine
231/// order.
232///
233/// If built in reverse, block and instruction indices used once the
234/// VCode is built are relative to the final (reversed) order, not the
235/// order of construction. Note that this means we do not know the
236/// final block or instruction indices when building, so we do not
237/// hand them out. (The user is assumed to know them when appending
238/// terminator instructions with successor blocks.)
239pub struct VCodeBuilder<I: VCodeInst> {
240 /// In-progress VCode.
241 pub(crate) vcode: VCode<I>,
242
243 /// In what direction is the build occuring?
244 direction: VCodeBuildDirection,
245
246 /// Index of the last block-start in the vcode.
247 block_start: usize,
248
249 /// Start of succs for the current block in the concatenated succs list.
250 succ_start: usize,
251
252 /// Start of blockparams for the current block in the concatenated
253 /// blockparams list.
254 block_params_start: usize,
255
256 /// Start of successor blockparam arg list entries in
257 /// the concatenated branch_block_arg_range list.
258 branch_block_arg_succ_start: usize,
259
260 /// Current source location.
261 cur_srcloc: RelSourceLoc,
262
263 /// Debug-value label in-progress map, keyed by label. For each
264 /// label, we keep disjoint ranges mapping to vregs. We'll flatten
265 /// this into (vreg, range, label) tuples when done.
266 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
267}
268
269/// Direction in which a VCodeBuilder builds VCode.
270#[derive(Clone, Copy, Debug, PartialEq, Eq)]
271pub enum VCodeBuildDirection {
272 // TODO: add `Forward` once we need it and can test it adequately.
273 /// Backward-build pass: we expect the producer to call `emit()`
274 /// with instructions in reverse program order within each block.
275 Backward,
276}
277
278impl<I: VCodeInst> VCodeBuilder<I> {
279 /// Create a new VCodeBuilder.
280 pub fn new(
281 sigs: SigSet,
282 abi: Callee<I::ABIMachineSpec>,
283 emit_info: I::Info,
284 block_order: BlockLoweringOrder,
285 constants: VCodeConstants,
286 direction: VCodeBuildDirection,
287 ) -> VCodeBuilder<I> {
288 let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
289
290 VCodeBuilder {
291 vcode,
292 direction,
293 block_start: 0,
294 succ_start: 0,
295 block_params_start: 0,
296 branch_block_arg_succ_start: 0,
297 cur_srcloc: Default::default(),
298 debug_info: FxHashMap::default(),
299 }
300 }
301
302 pub fn init_abi(&mut self, temps: Vec<Writable<Reg>>) {
303 self.vcode.abi.init(&self.vcode.sigs, temps);
304 }
305
306 /// Access the ABI object.
307 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
308 &self.vcode.abi
309 }
310
311 /// Access the ABI object.
312 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
313 &mut self.vcode.abi
314 }
315
316 pub fn sigs(&self) -> &SigSet {
317 &self.vcode.sigs
318 }
319
320 pub fn sigs_mut(&mut self) -> &mut SigSet {
321 &mut self.vcode.sigs
322 }
323
324 /// Access to the BlockLoweringOrder object.
325 pub fn block_order(&self) -> &BlockLoweringOrder {
326 &self.vcode.block_order
327 }
328
329 /// Set the current block as the entry block.
330 pub fn set_entry(&mut self, block: BlockIndex) {
331 self.vcode.entry = block;
332 }
333
334 /// End the current basic block. Must be called after emitting vcode insts
335 /// for IR insts and prior to ending the function (building the VCode).
336 pub fn end_bb(&mut self) {
337 let start_idx = self.block_start;
338 let end_idx = self.vcode.insts.len();
339 self.block_start = end_idx;
340 // Add the instruction index range to the list of blocks.
341 self.vcode
342 .block_ranges
343 .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx)));
344 // End the successors list.
345 let succ_end = self.vcode.block_succs_preds.len();
346 self.vcode
347 .block_succ_range
348 .push((self.succ_start as u32, succ_end as u32));
349 self.succ_start = succ_end;
350 // End the blockparams list.
351 let block_params_end = self.vcode.block_params.len();
352 self.vcode
353 .block_params_range
354 .push((self.block_params_start as u32, block_params_end as u32));
355 self.block_params_start = block_params_end;
356 // End the branch blockparam args list.
357 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
358 self.vcode.branch_block_arg_succ_range.push((
359 self.branch_block_arg_succ_start as u32,
360 branch_block_arg_succ_end as u32,
361 ));
362 self.branch_block_arg_succ_start = branch_block_arg_succ_end;
363 }
364
365 pub fn add_block_param(&mut self, param: VirtualReg) {
366 self.vcode.block_params.push(param.into());
367 }
368
369 fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
370 let start = self.vcode.branch_block_args.len();
371 self.vcode
372 .branch_block_args
373 .extend(args.iter().map(|&arg| VReg::from(arg)));
374 let end = self.vcode.branch_block_args.len();
375 self.vcode
376 .branch_block_arg_range
377 .push((start as u32, end as u32));
378 }
379
380 /// Push an instruction for the current BB and current IR inst
381 /// within the BB.
382 pub fn push(&mut self, insn: I) {
383 self.vcode.insts.push(insn);
384 self.vcode.srclocs.push(self.cur_srcloc);
385 }
386
387 /// Add a successor block with branch args.
388 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
389 self.vcode.block_succs_preds.push(block);
390 self.add_branch_args_for_succ(args);
391 }
392
393 /// Set the current source location.
394 pub fn set_srcloc(&mut self, srcloc: RelSourceLoc) {
395 self.cur_srcloc = srcloc;
396 }
397
398 /// Add a debug value label to a register.
399 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
400 // We'll fix up labels in reverse(). Because we're generating
401 // code bottom-to-top, the liverange of the label goes *from*
402 // the last index at which was defined (or 0, which is the end
403 // of the eventual function) *to* just this instruction, and
404 // no further.
405 let inst = InsnIndex::new(self.vcode.insts.len());
406 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
407 let last = labels
408 .last()
409 .map(|(_start, end, _vreg)| *end)
410 .unwrap_or(InsnIndex::new(0));
411 labels.push((last, inst, reg.into()));
412 }
413
414 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
415 let from = from.into();
416 let resolved_to = self.resolve_vreg_alias(to.into());
417 // Disallow cycles (see below).
418 assert_ne!(resolved_to, from);
419 self.vcode.vreg_aliases.insert(from, resolved_to);
420 }
421
422 pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg {
423 Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from)
424 }
425
426 fn resolve_vreg_alias_impl(
427 aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>,
428 from: regalloc2::VReg,
429 ) -> regalloc2::VReg {
430 // We prevent cycles from existing by resolving targets of
431 // aliases eagerly before setting them. If the target resolves
432 // to the origin of the alias, then a cycle would be created
433 // and the alias is disallowed. Because of the structure of
434 // SSA code (one instruction can refer to another's defs but
435 // not vice-versa, except indirectly through
436 // phis/blockparams), cycles should not occur as we use
437 // aliases to redirect vregs to the temps that actually define
438 // them.
439
440 let mut vreg = from;
441 while let Some(to) = aliases.get(&vreg) {
442 vreg = *to;
443 }
444 vreg
445 }
446
447 /// Access the constants.
448 pub fn constants(&mut self) -> &mut VCodeConstants {
449 &mut self.vcode.constants
450 }
451
452 fn compute_preds_from_succs(&mut self) {
453 // Compute predecessors from successors. In order to gather
454 // all preds for a block into a contiguous sequence, we build
455 // a list of (succ, pred) tuples and then sort.
456 let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> =
457 Vec::with_capacity(self.vcode.block_succs_preds.len());
458 for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() {
459 let pred = BlockIndex::new(pred);
460 for i in start..end {
461 let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index());
462 succ_pred_edges.push((succ, pred));
463 }
464 }
465 succ_pred_edges.sort_unstable();
466
467 let mut i = 0;
468 for succ in 0..self.vcode.num_blocks() {
469 let succ = BlockIndex::new(succ);
470 let start = self.vcode.block_succs_preds.len();
471 while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ {
472 let pred = succ_pred_edges[i].1;
473 self.vcode.block_succs_preds.push(pred);
474 i += 1;
475 }
476 let end = self.vcode.block_succs_preds.len();
477 self.vcode.block_pred_range.push((start as u32, end as u32));
478 }
479 }
480
481 /// Called once, when a build in Backward order is complete, to
482 /// perform the overall reversal (into final forward order) and
483 /// finalize metadata accordingly.
484 fn reverse_and_finalize(&mut self) {
485 let n_insts = self.vcode.insts.len();
486 if n_insts == 0 {
487 return;
488 }
489
490 // Reverse the per-block and per-inst sequences.
491 self.vcode.block_ranges.reverse();
492 // block_params_range is indexed by block (and blocks were
493 // traversed in reverse) so we reverse it; but block-param
494 // sequences in the concatenated vec can remain in reverse
495 // order (it is effectively an arena of arbitrarily-placed
496 // referenced sequences).
497 self.vcode.block_params_range.reverse();
498 // Likewise, we reverse block_succ_range, but the block_succ
499 // concatenated array can remain as-is.
500 self.vcode.block_succ_range.reverse();
501 self.vcode.insts.reverse();
502 self.vcode.srclocs.reverse();
503 // Likewise, branch_block_arg_succ_range is indexed by block
504 // so must be reversed.
505 self.vcode.branch_block_arg_succ_range.reverse();
506
507 // To translate an instruction index *endpoint* in reversed
508 // order to forward order, compute `n_insts - i`.
509 //
510 // Why not `n_insts - 1 - i`? That would be correct to
511 // translate an individual instruction index (for ten insts 0
512 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
513 // 0). But for the usual inclusive-start, exclusive-end range
514 // idiom, inclusive starts become exclusive ends and
515 // vice-versa, so e.g. an (inclusive) start of 0 becomes an
516 // (exclusive) end of 10.
517 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
518
519 // Edit the block-range instruction indices.
520 for tuple in &mut self.vcode.block_ranges {
521 let (start, end) = *tuple;
522 *tuple = (translate(end), translate(start)); // Note reversed order.
523 }
524
525 // Generate debug-value labels based on per-label maps.
526 for (label, tuples) in &self.debug_info {
527 for &(start, end, vreg) in tuples {
528 let vreg = self.resolve_vreg_alias(vreg);
529 let fwd_start = translate(end);
530 let fwd_end = translate(start);
531 self.vcode
532 .debug_value_labels
533 .push((vreg, fwd_start, fwd_end, label.as_u32()));
534 }
535 }
536
537 // Now sort debug value labels by VReg, as required
538 // by regalloc2.
539 self.vcode
540 .debug_value_labels
541 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
542 }
543
544 fn collect_operands(&mut self, allocatable: PRegSet) {
545 for (i, insn) in self.vcode.insts.iter().enumerate() {
546 // Push operands from the instruction onto the operand list.
547 //
548 // We rename through the vreg alias table as we collect
549 // the operands. This is better than a separate post-pass
550 // over operands, because it has more cache locality:
551 // operands only need to pass through L1 once. This is
552 // also better than renaming instructions'
553 // operands/registers while lowering, because here we only
554 // need to do the `match` over the instruction to visit
555 // its register fields (which is slow, branchy code) once.
556
557 let vreg_aliases = &self.vcode.vreg_aliases;
558 let mut op_collector =
559 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
560 Self::resolve_vreg_alias_impl(vreg_aliases, vreg)
561 });
562 insn.get_operands(&mut op_collector);
563 let (ops, clobbers) = op_collector.finish();
564 self.vcode.operand_ranges.push(ops);
565
566 if clobbers != PRegSet::default() {
567 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
568 }
569
570 if let Some((dst, src)) = insn.is_move() {
571 // We should never see non-virtual registers present in move
572 // instructions.
573 assert!(
574 src.is_virtual(),
575 "the real register {:?} was used as the source of a move instruction",
576 src
577 );
578 assert!(
579 dst.to_reg().is_virtual(),
580 "the real register {:?} was used as the destination of a move instruction",
581 dst.to_reg()
582 );
583
584 let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into()));
585 let dst = Operand::reg_def(Self::resolve_vreg_alias_impl(
586 vreg_aliases,
587 dst.to_reg().into(),
588 ));
589
590 // Note that regalloc2 requires these in (src, dst) order.
591 self.vcode.is_move.insert(InsnIndex::new(i), (src, dst));
592 }
593 }
594
595 // Translate blockparam args via the vreg aliases table as well.
596 for arg in &mut self.vcode.branch_block_args {
597 let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg);
598 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
599 *arg = new_arg;
600 }
601 }
602
603 /// Build the final VCode.
604 pub fn build(mut self, allocatable: PRegSet, vregs: VRegAllocator<I>) -> VCode<I> {
605 self.vcode.vreg_types = vregs.vreg_types;
606 self.vcode.reftyped_vregs = vregs.reftyped_vregs;
607
608 if self.direction == VCodeBuildDirection::Backward {
609 self.reverse_and_finalize();
610 }
611 self.collect_operands(allocatable);
612
613 // Apply register aliases to the `reftyped_vregs` list since this list
614 // will be returned directly to `regalloc2` eventually and all
615 // operands/results of instructions will use the alias-resolved vregs
616 // from `regalloc2`'s perspective.
617 //
618 // Also note that `reftyped_vregs` can't have duplicates, so after the
619 // aliases are applied duplicates are removed.
620 for reg in self.vcode.reftyped_vregs.iter_mut() {
621 *reg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *reg);
622 }
623 self.vcode.reftyped_vregs.sort();
624 self.vcode.reftyped_vregs.dedup();
625
626 self.compute_preds_from_succs();
627 self.vcode.debug_value_labels.sort_unstable();
628 self.vcode
629 }
630}
631
632/// Is this type a reference type?
633fn is_reftype(ty: Type) -> bool {
634 ty == types::R64 || ty == types::R32
635}
636
637impl<I: VCodeInst> VCode<I> {
638 /// New empty VCode.
639 fn new(
640 sigs: SigSet,
641 abi: Callee<I::ABIMachineSpec>,
642 emit_info: I::Info,
643 block_order: BlockLoweringOrder,
644 constants: VCodeConstants,
645 ) -> VCode<I> {
646 let n_blocks = block_order.lowered_order().len();
647 VCode {
648 sigs,
649 vreg_types: vec![],
650 insts: Vec::with_capacity(10 * n_blocks),
651 operands: Vec::with_capacity(30 * n_blocks),
652 operand_ranges: Vec::with_capacity(10 * n_blocks),
653 clobbers: FxHashMap::default(),
654 is_move: FxHashMap::default(),
655 srclocs: Vec::with_capacity(10 * n_blocks),
656 entry: BlockIndex::new(0),
657 block_ranges: Vec::with_capacity(n_blocks),
658 block_succ_range: Vec::with_capacity(n_blocks),
659 block_succs_preds: Vec::with_capacity(2 * n_blocks),
660 block_pred_range: Vec::with_capacity(n_blocks),
661 block_params_range: Vec::with_capacity(n_blocks),
662 block_params: Vec::with_capacity(5 * n_blocks),
663 branch_block_args: Vec::with_capacity(10 * n_blocks),
664 branch_block_arg_range: Vec::with_capacity(2 * n_blocks),
665 branch_block_arg_succ_range: Vec::with_capacity(n_blocks),
666 block_order,
667 abi,
668 emit_info,
669 reftyped_vregs: vec![],
670 constants,
671 debug_value_labels: vec![],
672 vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()),
673 }
674 }
675
676 /// Get the number of blocks. Block indices will be in the range `0 ..
677 /// (self.num_blocks() - 1)`.
678 pub fn num_blocks(&self) -> usize {
679 self.block_ranges.len()
680 }
681
682 /// The number of lowered instructions.
683 pub fn num_insts(&self) -> usize {
684 self.insts.len()
685 }
686
687 /// Get the successors for a block.
688 pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
689 let (start, end) = self.block_succ_range[block.index()];
690 &self.block_succs_preds[start as usize..end as usize]
691 }
692
693 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> {
694 // Compute clobbered registers.
695 let mut clobbered = vec![];
696 let mut clobbered_set = FxHashSet::default();
697
698 // All moves are included in clobbers.
699 for edit in ®alloc.edits {
700 let Edit::Move { to, .. } = edit.1;
701 if let Some(preg) = to.as_reg() {
702 let reg = RealReg::from(preg);
703 if clobbered_set.insert(reg) {
704 clobbered.push(Writable::from_reg(reg));
705 }
706 }
707 }
708
709 for (i, (start, end)) in self.operand_ranges.iter().enumerate() {
710 // Skip this instruction if not "included in clobbers" as
711 // per the MachInst. (Some backends use this to implement
712 // ABI specifics; e.g., excluding calls of the same ABI as
713 // the current function from clobbers, because by
714 // definition everything clobbered by the call can be
715 // clobbered by this function without saving as well.)
716 if !self.insts[i].is_included_in_clobbers() {
717 continue;
718 }
719
720 let start = *start as usize;
721 let end = *end as usize;
722 let operands = &self.operands[start..end];
723 let allocs = ®alloc.allocs[start..end];
724 for (operand, alloc) in operands.iter().zip(allocs.iter()) {
725 // We're interested only in writes (Mods or Defs).
726 if operand.kind() == OperandKind::Use {
727 continue;
728 }
729 if let Some(preg) = alloc.as_reg() {
730 let reg = RealReg::from(preg);
731 if clobbered_set.insert(reg) {
732 clobbered.push(Writable::from_reg(reg));
733 }
734 }
735 }
736
737 // Also add explicitly-clobbered registers.
738 for preg in self
739 .clobbers
740 .get(&InsnIndex::new(i))
741 .cloned()
742 .unwrap_or_default()
743 {
744 let reg = RealReg::from(preg);
745 if clobbered_set.insert(reg) {
746 clobbered.push(Writable::from_reg(reg));
747 }
748 }
749 }
750
751 clobbered
752 }
753
754 /// Emit the instructions to a `MachBuffer`, containing fixed-up
755 /// code and external reloc/trap/etc. records ready for use. Takes
756 /// the regalloc results as well.
757 ///
758 /// Returns the machine code itself, and optionally metadata
759 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
760 /// is consumed by the emission process.
761 pub fn emit(
762 mut self,
763 regalloc: ®alloc2::Output,
764 want_disasm: bool,
765 want_metadata: bool,
766 ) -> EmitResult<I>
767 where
768 I: VCodeInst,
769 {
770 // To write into disasm string.
771 use core::fmt::Write;
772
773 let _tt = timing::vcode_emit();
774 let mut buffer = MachBuffer::new();
775 let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
776
777 // The first M MachLabels are reserved for block indices, the next N MachLabels for
778 // constants.
779 buffer.reserve_labels_for_blocks(self.num_blocks());
780 buffer.reserve_labels_for_constants(&self.constants);
781
782 // Construct the final order we emit code in: cold blocks at the end.
783 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
784 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
785 for block in 0..self.num_blocks() {
786 let block = BlockIndex::new(block);
787 if self.block_order.is_cold(block) {
788 cold_blocks.push(block);
789 } else {
790 final_order.push(block);
791 }
792 }
793 final_order.extend(cold_blocks.clone());
794
795 // Compute/save info we need for the prologue: clobbers and
796 // number of spillslots.
797 //
798 // We clone `abi` here because we will mutate it as we
799 // generate the prologue and set other info, but we can't
800 // mutate `VCode`. The info it usually carries prior to
801 // setting clobbers is fairly minimal so this should be
802 // relatively cheap.
803 let clobbers = self.compute_clobbers(regalloc);
804 self.abi.set_num_spillslots(regalloc.num_spillslots);
805 self.abi.set_clobbered(clobbers);
806
807 // We need to generate the prologue in order to get the ABI
808 // object into the right state first. We'll emit it when we
809 // hit the right block below.
810 let prologue_insts = self.abi.gen_prologue(&self.sigs);
811
812 // Emit blocks.
813 let mut cur_srcloc = None;
814 let mut last_offset = None;
815 let mut inst_offsets = vec![];
816 let mut state = I::State::new(&self.abi);
817
818 let mut disasm = String::new();
819
820 if !self.debug_value_labels.is_empty() {
821 inst_offsets.resize(self.insts.len(), 0);
822 }
823
824 // Count edits per block ahead of time; this is needed for
825 // lookahead island emission. (We could derive it per-block
826 // with binary search in the edit list, but it's more
827 // efficient to do it in one pass here.)
828 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
829 let mut edit_idx = 0;
830 for block in 0..self.num_blocks() {
831 let end_inst = self.block_ranges[block].1;
832 let start_edit_idx = edit_idx;
833 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
834 edit_idx += 1;
835 }
836 let end_edit_idx = edit_idx;
837 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
838 }
839
840 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
841
842 for (block_order_idx, &block) in final_order.iter().enumerate() {
843 trace!("emitting block {:?}", block);
844 let new_offset = I::align_basic_block(buffer.cur_offset());
845 while new_offset > buffer.cur_offset() {
846 // Pad with NOPs up to the aligned block offset.
847 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
848 nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default());
849 }
850 assert_eq!(buffer.cur_offset(), new_offset);
851
852 let do_emit = |inst: &I,
853 allocs: &[Allocation],
854 disasm: &mut String,
855 buffer: &mut MachBuffer<I>,
856 state: &mut I::State| {
857 if want_disasm && !inst.is_args() {
858 let mut s = state.clone();
859 writeln!(disasm, " {}", inst.pretty_print_inst(allocs, &mut s)).unwrap();
860 }
861 inst.emit(allocs, buffer, &self.emit_info, state);
862 };
863
864 // Is this the first block? Emit the prologue directly if so.
865 if block == self.entry {
866 trace!(" -> entry block");
867 buffer.start_srcloc(Default::default());
868 state.pre_sourceloc(Default::default());
869 for inst in &prologue_insts {
870 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
871 }
872 buffer.end_srcloc();
873 }
874
875 // Now emit the regular block body.
876
877 buffer.bind_label(MachLabel::from_block(block));
878
879 if want_disasm {
880 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
881 }
882
883 if want_metadata {
884 // Track BB starts. If we have backed up due to MachBuffer
885 // branch opts, note that the removed blocks were removed.
886 let cur_offset = buffer.cur_offset();
887 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
888 for i in (0..bb_starts.len()).rev() {
889 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
890 break;
891 }
892 bb_starts[i] = None;
893 }
894 }
895 bb_starts.push(Some(cur_offset));
896 last_offset = Some(cur_offset);
897 }
898
899 if let Some(block_start) = I::gen_block_start(
900 self.block_order.is_indirect_branch_target(block),
901 is_forward_edge_cfi_enabled,
902 ) {
903 do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state);
904 }
905
906 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
907 match inst_or_edit {
908 InstOrEdit::Inst(iix) => {
909 if !self.debug_value_labels.is_empty() {
910 // If we need to produce debug info,
911 // record the offset of each instruction
912 // so that we can translate value-label
913 // ranges to machine-code offsets.
914
915 // Cold blocks violate monotonicity
916 // assumptions elsewhere (that
917 // instructions in inst-index order are in
918 // order in machine code), so we omit
919 // their offsets here. Value-label range
920 // generation below will skip empty ranges
921 // and ranges with to-offsets of zero.
922 if !self.block_order.is_cold(block) {
923 inst_offsets[iix.index()] = buffer.cur_offset();
924 }
925 }
926
927 if self.insts[iix.index()].is_move().is_some() {
928 // Skip moves in the pre-regalloc program;
929 // all of these are incorporated by the
930 // regalloc into its unified move handling
931 // and they come out the other end, if
932 // still needed (not elided), as
933 // regalloc-inserted moves.
934 continue;
935 }
936
937 // Update the srcloc at this point in the buffer.
938 let srcloc = self.srclocs[iix.index()];
939 if cur_srcloc != Some(srcloc) {
940 if cur_srcloc.is_some() {
941 buffer.end_srcloc();
942 }
943 buffer.start_srcloc(srcloc);
944 cur_srcloc = Some(srcloc);
945 }
946 state.pre_sourceloc(cur_srcloc.unwrap_or_default());
947
948 // If this is a safepoint, compute a stack map
949 // and pass it to the emit state.
950 if self.insts[iix.index()].is_safepoint() {
951 let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
952 // Find the contiguous range of
953 // (progpoint, allocation) safepoint slot
954 // records in `regalloc.safepoint_slots`
955 // for this instruction index.
956 let safepoint_slots_start = regalloc
957 .safepoint_slots
958 .binary_search_by(|(progpoint, _alloc)| {
959 if progpoint.inst() >= iix {
960 std::cmp::Ordering::Greater
961 } else {
962 std::cmp::Ordering::Less
963 }
964 })
965 .unwrap_err();
966
967 for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
968 .iter()
969 .take_while(|(progpoint, _)| progpoint.inst() == iix)
970 {
971 let slot = alloc.as_stack().unwrap();
972 safepoint_slots.push(slot);
973 }
974 if !safepoint_slots.is_empty() {
975 let stack_map = self
976 .abi
977 .spillslots_to_stack_map(&safepoint_slots[..], &state);
978 state.pre_safepoint(stack_map);
979 }
980 }
981
982 // Get the allocations for this inst from the regalloc result.
983 let allocs = regalloc.inst_allocs(iix);
984
985 // If the instruction we are about to emit is
986 // a return, place an epilogue at this point
987 // (and don't emit the return; the actual
988 // epilogue will contain it).
989 if self.insts[iix.index()].is_term() == MachTerminator::Ret {
990 for inst in self.abi.gen_epilogue() {
991 do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
992 }
993 } else {
994 // Emit the instruction!
995 do_emit(
996 &self.insts[iix.index()],
997 allocs,
998 &mut disasm,
999 &mut buffer,
1000 &mut state,
1001 );
1002 }
1003 }
1004
1005 InstOrEdit::Edit(Edit::Move { from, to }) => {
1006 // Create a move/spill/reload instruction and
1007 // immediately emit it.
1008 match (from.as_reg(), to.as_reg()) {
1009 (Some(from), Some(to)) => {
1010 // Reg-to-reg move.
1011 let from_rreg = Reg::from(from);
1012 let to_rreg = Writable::from_reg(Reg::from(to));
1013 debug_assert_eq!(from.class(), to.class());
1014 let ty = I::canonical_type_for_rc(from.class());
1015 let mv = I::gen_move(to_rreg, from_rreg, ty);
1016 do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state);
1017 }
1018 (Some(from), None) => {
1019 // Spill from register to spillslot.
1020 let to = to.as_stack().unwrap();
1021 let from_rreg = RealReg::from(from);
1022 let spill = self.abi.gen_spill(to, from_rreg);
1023 do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state);
1024 }
1025 (None, Some(to)) => {
1026 // Load from spillslot to register.
1027 let from = from.as_stack().unwrap();
1028 let to_rreg = Writable::from_reg(RealReg::from(to));
1029 let reload = self.abi.gen_reload(to_rreg, from);
1030 do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state);
1031 }
1032 (None, None) => {
1033 panic!("regalloc2 should have eliminated stack-to-stack moves!");
1034 }
1035 }
1036 }
1037 }
1038 }
1039
1040 if cur_srcloc.is_some() {
1041 buffer.end_srcloc();
1042 cur_srcloc = None;
1043 }
1044
1045 // Do we need an island? Get the worst-case size of the
1046 // next BB and see if, having emitted that many bytes, we
1047 // will be beyond the deadline.
1048 if block_order_idx < final_order.len() - 1 {
1049 let next_block = final_order[block_order_idx + 1];
1050 let next_block_range = self.block_ranges[next_block.index()];
1051 let next_block_size =
1052 (next_block_range.1.index() - next_block_range.0.index()) as u32;
1053 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1054 let worst_case_next_bb =
1055 I::worst_case_size() * (next_block_size + next_block_ra_insertions);
1056 if buffer.island_needed(worst_case_next_bb) {
1057 buffer.emit_island(worst_case_next_bb);
1058 }
1059 }
1060 }
1061
1062 // Emit the constants used by the function.
1063 let mut alignment = 1;
1064 for (constant, data) in self.constants.iter() {
1065 alignment = data.alignment().max(alignment);
1066
1067 let label = buffer.get_label_for_constant(constant);
1068 buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
1069 }
1070
1071 let func_body_len = buffer.cur_offset();
1072
1073 // Create `bb_edges` and final (filtered) `bb_starts`.
1074 let mut bb_edges = vec![];
1075 let mut bb_offsets = vec![];
1076 if want_metadata {
1077 for block in 0..self.num_blocks() {
1078 if bb_starts[block].is_none() {
1079 // Block was deleted by MachBuffer; skip.
1080 continue;
1081 }
1082 let from = bb_starts[block].unwrap();
1083
1084 bb_offsets.push(from);
1085 // Resolve each `succ` label and add edges.
1086 let succs = self.block_succs(BlockIndex::new(block));
1087 for &succ in succs.iter() {
1088 let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1089 bb_edges.push((from, to));
1090 }
1091 }
1092 }
1093
1094 let value_labels_ranges =
1095 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1096 let frame_size = self.abi.frame_size();
1097
1098 EmitResult {
1099 buffer,
1100 bb_offsets,
1101 bb_edges,
1102 inst_offsets,
1103 func_body_len,
1104 disasm: if want_disasm { Some(disasm) } else { None },
1105 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1106 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1107 value_labels_ranges,
1108 frame_size,
1109 alignment,
1110 }
1111 }
1112
1113 fn compute_value_labels_ranges(
1114 &self,
1115 regalloc: ®alloc2::Output,
1116 inst_offsets: &[CodeOffset],
1117 func_body_len: u32,
1118 ) -> ValueLabelsRanges {
1119 if self.debug_value_labels.is_empty() {
1120 return ValueLabelsRanges::default();
1121 }
1122
1123 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1124 for &(label, from, to, alloc) in ®alloc.debug_locations {
1125 let ranges = value_labels_ranges
1126 .entry(ValueLabel::from_u32(label))
1127 .or_insert_with(|| vec![]);
1128 let from_offset = inst_offsets[from.inst().index()];
1129 let to_offset = if to.inst().index() == inst_offsets.len() {
1130 func_body_len
1131 } else {
1132 inst_offsets[to.inst().index()]
1133 };
1134
1135 // Empty range or to-offset of zero can happen because of
1136 // cold blocks (see above).
1137 if to_offset == 0 || from_offset == to_offset {
1138 continue;
1139 }
1140
1141 let loc = if let Some(preg) = alloc.as_reg() {
1142 LabelValueLoc::Reg(Reg::from(preg))
1143 } else {
1144 // We can't translate spillslot locations at the
1145 // moment because ValueLabelLoc requires an
1146 // instantaneous SP offset, and this can *change*
1147 // within the range we have here because of callsites
1148 // adjusting SP temporarily. To avoid the complexity
1149 // of accurately plumbing through nominal-SP
1150 // adjustment sites, we just omit debug info for
1151 // values that are spilled. Not ideal, but debug info
1152 // is best-effort.
1153 continue;
1154 };
1155
1156 ranges.push(ValueLocRange {
1157 loc,
1158 // ValueLocRanges are recorded by *instruction-end
1159 // offset*. `from_offset` is the *start* of the
1160 // instruction; that is the same as the end of another
1161 // instruction, so we only want to begin coverage once
1162 // we are past the previous instruction's end.
1163 start: from_offset + 1,
1164 // Likewise, `end` is exclusive, but we want to
1165 // *include* the end of the last
1166 // instruction. `to_offset` is the start of the
1167 // `to`-instruction, which is the exclusive end, i.e.,
1168 // the first instruction not covered. That
1169 // instruction's start is the same as the end of the
1170 // last instruction that is included, so we go one
1171 // byte further to be sure to include it.
1172 end: to_offset + 1,
1173 });
1174 }
1175
1176 value_labels_ranges
1177 }
1178
1179 /// Get the IR block for a BlockIndex, if one exists.
1180 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1181 self.block_order.lowered_order()[block.index()].orig_block()
1182 }
1183
1184 #[inline]
1185 fn assert_no_vreg_aliases<'a>(&self, list: &'a [VReg]) -> &'a [VReg] {
1186 for vreg in list {
1187 self.assert_not_vreg_alias(*vreg);
1188 }
1189 list
1190 }
1191
1192 #[inline]
1193 fn assert_not_vreg_alias(&self, vreg: VReg) -> VReg {
1194 debug_assert!(VCodeBuilder::<I>::resolve_vreg_alias_impl(&self.vreg_aliases, vreg) == vreg);
1195 vreg
1196 }
1197
1198 #[inline]
1199 fn assert_operand_not_vreg_alias(&self, op: Operand) -> Operand {
1200 // It should be true by construction that `Operand`s do not contain any
1201 // aliased vregs since they're all collected and mapped when the VCode
1202 // is itself constructed.
1203 self.assert_not_vreg_alias(op.vreg());
1204 op
1205 }
1206}
1207
1208impl<I: VCodeInst> RegallocFunction for VCode<I> {
1209 fn num_insts(&self) -> usize {
1210 self.insts.len()
1211 }
1212
1213 fn num_blocks(&self) -> usize {
1214 self.block_ranges.len()
1215 }
1216
1217 fn entry_block(&self) -> BlockIndex {
1218 self.entry
1219 }
1220
1221 fn block_insns(&self, block: BlockIndex) -> InstRange {
1222 let (start, end) = self.block_ranges[block.index()];
1223 InstRange::forward(start, end)
1224 }
1225
1226 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1227 let (start, end) = self.block_succ_range[block.index()];
1228 &self.block_succs_preds[start as usize..end as usize]
1229 }
1230
1231 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1232 let (start, end) = self.block_pred_range[block.index()];
1233 &self.block_succs_preds[start as usize..end as usize]
1234 }
1235
1236 fn block_params(&self, block: BlockIndex) -> &[VReg] {
1237 // As a special case we don't return block params for the entry block, as all the arguments
1238 // will be defined by the `Inst::Args` instruction.
1239 if block == self.entry {
1240 return &[];
1241 }
1242
1243 let (start, end) = self.block_params_range[block.index()];
1244 let ret = &self.block_params[start as usize..end as usize];
1245 // Currently block params are never aliased to another vreg, but
1246 // double-check just to be sure.
1247 self.assert_no_vreg_aliases(ret)
1248 }
1249
1250 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1251 let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()];
1252 let succ_ranges =
1253 &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize];
1254 let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx];
1255 let ret = &self.branch_block_args
1256 [branch_block_args_start as usize..branch_block_args_end as usize];
1257 self.assert_no_vreg_aliases(ret)
1258 }
1259
1260 fn is_ret(&self, insn: InsnIndex) -> bool {
1261 match self.insts[insn.index()].is_term() {
1262 // We treat blocks terminated by an unconditional trap like a return for regalloc.
1263 MachTerminator::None => self.insts[insn.index()].is_trap(),
1264 MachTerminator::Ret => true,
1265 _ => false,
1266 }
1267 }
1268
1269 fn is_branch(&self, insn: InsnIndex) -> bool {
1270 match self.insts[insn.index()].is_term() {
1271 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1272 _ => false,
1273 }
1274 }
1275
1276 fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1277 self.insts[insn.index()].is_safepoint()
1278 }
1279
1280 fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> {
1281 let (a, b) = self.is_move.get(&insn)?;
1282 Some((
1283 self.assert_operand_not_vreg_alias(*a),
1284 self.assert_operand_not_vreg_alias(*b),
1285 ))
1286 }
1287
1288 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1289 let (start, end) = self.operand_ranges[insn.index()];
1290 let ret = &self.operands[start as usize..end as usize];
1291 for op in ret {
1292 self.assert_operand_not_vreg_alias(*op);
1293 }
1294 ret
1295 }
1296
1297 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1298 self.clobbers.get(&insn).cloned().unwrap_or_default()
1299 }
1300
1301 fn num_vregs(&self) -> usize {
1302 std::cmp::max(self.vreg_types.len(), first_user_vreg_index())
1303 }
1304
1305 fn reftype_vregs(&self) -> &[VReg] {
1306 self.assert_no_vreg_aliases(&self.reftyped_vregs[..])
1307 }
1308
1309 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1310 // VRegs here are inserted into `debug_value_labels` after code is
1311 // generated and aliases are fully defined, so no double-check that
1312 // aliases are not lingering.
1313 for (vreg, ..) in self.debug_value_labels.iter() {
1314 self.assert_not_vreg_alias(*vreg);
1315 }
1316 &self.debug_value_labels[..]
1317 }
1318
1319 fn spillslot_size(&self, regclass: RegClass) -> usize {
1320 self.abi.get_spillslot_size(regclass) as usize
1321 }
1322
1323 fn allow_multiple_vreg_defs(&self) -> bool {
1324 // At least the s390x backend requires this, because the
1325 // `Loop` pseudo-instruction aggregates all Operands so pinned
1326 // vregs (RealRegs) may occur more than once.
1327 true
1328 }
1329}
1330
1331impl<I: VCodeInst> fmt::Debug for VCode<I> {
1332 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1333 writeln!(f, "VCode {{")?;
1334 writeln!(f, " Entry block: {}", self.entry.index())?;
1335
1336 let mut state = Default::default();
1337
1338 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1339 alias_keys.sort_unstable();
1340 for key in alias_keys {
1341 let dest = self.vreg_aliases.get(&key).unwrap();
1342 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1343 }
1344
1345 for block in 0..self.num_blocks() {
1346 let block = BlockIndex::new(block);
1347 writeln!(f, "Block {}:", block.index())?;
1348 if let Some(bb) = self.bindex_to_bb(block) {
1349 writeln!(f, " (original IR block: {})", bb)?;
1350 }
1351 for succ in self.succs(block) {
1352 writeln!(f, " (successor: Block {})", succ.index())?;
1353 }
1354 let (start, end) = self.block_ranges[block.index()];
1355 writeln!(
1356 f,
1357 " (instruction range: {} .. {})",
1358 start.index(),
1359 end.index()
1360 )?;
1361 for inst in start.index()..end.index() {
1362 writeln!(
1363 f,
1364 " Inst {}: {}",
1365 inst,
1366 self.insts[inst].pretty_print_inst(&[], &mut state)
1367 )?;
1368 }
1369 }
1370
1371 writeln!(f, "}}")?;
1372 Ok(())
1373 }
1374}
1375
1376/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1377pub struct VRegAllocator<I> {
1378 /// Next virtual register number to allocate.
1379 next_vreg: usize,
1380
1381 /// VReg IR-level types.
1382 vreg_types: Vec<Type>,
1383
1384 /// A set with the same contents as `reftyped_vregs`, in order to
1385 /// avoid inserting more than once.
1386 reftyped_vregs_set: FxHashSet<VReg>,
1387
1388 /// Reference-typed `regalloc2::VReg`s. The regalloc requires
1389 /// these in a dense slice (as opposed to querying the
1390 /// reftype-status of each vreg) for efficient iteration.
1391 reftyped_vregs: Vec<VReg>,
1392
1393 /// The type of instruction that this allocator makes registers for.
1394 _inst: core::marker::PhantomData<I>,
1395}
1396
1397impl<I: VCodeInst> VRegAllocator<I> {
1398 /// Make a new VRegAllocator.
1399 pub fn new() -> Self {
1400 Self {
1401 next_vreg: first_user_vreg_index(),
1402 vreg_types: vec![],
1403 reftyped_vregs_set: FxHashSet::default(),
1404 reftyped_vregs: vec![],
1405 _inst: core::marker::PhantomData::default(),
1406 }
1407 }
1408
1409 /// Allocate a fresh ValueRegs.
1410 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1411 let v = self.next_vreg;
1412 let (regclasses, tys) = I::rc_for_type(ty)?;
1413 self.next_vreg += regclasses.len();
1414 if self.next_vreg >= VReg::MAX {
1415 return Err(CodegenError::CodeTooLarge);
1416 }
1417
1418 let regs: ValueRegs<Reg> = match regclasses {
1419 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1420 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1421 // We can extend this if/when we support 32-bit targets; e.g.,
1422 // an i128 on a 32-bit machine will need up to four machine regs
1423 // for a `Value`.
1424 _ => panic!("Value must reside in 1 or 2 registers"),
1425 };
1426 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) {
1427 self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
1428 }
1429 Ok(regs)
1430 }
1431
1432 /// Set the type of this virtual register.
1433 pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
1434 if self.vreg_types.len() <= vreg.index() {
1435 self.vreg_types.resize(vreg.index() + 1, ir::types::INVALID);
1436 }
1437 self.vreg_types[vreg.index()] = ty;
1438 if is_reftype(ty) {
1439 let vreg: VReg = vreg.into();
1440 if self.reftyped_vregs_set.insert(vreg) {
1441 self.reftyped_vregs.push(vreg);
1442 }
1443 }
1444 }
1445}
1446
1447/// This structure tracks the large constants used in VCode that will be emitted separately by the
1448/// [MachBuffer].
1449///
1450/// First, during the lowering phase, constants are inserted using
1451/// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
1452/// used in this phase. Some deduplication is performed, when possible, as constant
1453/// values are inserted.
1454///
1455/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1456/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1457/// then writes the constant values to the buffer.
1458#[derive(Default)]
1459pub struct VCodeConstants {
1460 constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1461 pool_uses: HashMap<Constant, VCodeConstant>,
1462 well_known_uses: HashMap<*const [u8], VCodeConstant>,
1463 u64s: HashMap<[u8; 8], VCodeConstant>,
1464}
1465impl VCodeConstants {
1466 /// Initialize the structure with the expected number of constants.
1467 pub fn with_capacity(expected_num_constants: usize) -> Self {
1468 Self {
1469 constants: PrimaryMap::with_capacity(expected_num_constants),
1470 pool_uses: HashMap::with_capacity(expected_num_constants),
1471 well_known_uses: HashMap::new(),
1472 u64s: HashMap::new(),
1473 }
1474 }
1475
1476 /// Insert a constant; using this method indicates that a constant value will be used and thus
1477 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1478 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1479 /// [VCodeConstantData::Generated].
1480 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1481 match data {
1482 VCodeConstantData::Generated(_) => self.constants.push(data),
1483 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1484 None => {
1485 let vcode_constant = self.constants.push(data);
1486 self.pool_uses.insert(constant, vcode_constant);
1487 vcode_constant
1488 }
1489 Some(&vcode_constant) => vcode_constant,
1490 },
1491 VCodeConstantData::WellKnown(data_ref) => {
1492 match self.well_known_uses.entry(data_ref as *const [u8]) {
1493 Entry::Vacant(v) => {
1494 let vcode_constant = self.constants.push(data);
1495 v.insert(vcode_constant);
1496 vcode_constant
1497 }
1498 Entry::Occupied(o) => *o.get(),
1499 }
1500 }
1501 VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1502 Entry::Vacant(v) => {
1503 let vcode_constant = self.constants.push(data);
1504 v.insert(vcode_constant);
1505 vcode_constant
1506 }
1507 Entry::Occupied(o) => *o.get(),
1508 },
1509 }
1510 }
1511
1512 /// Return the number of constants inserted.
1513 pub fn len(&self) -> usize {
1514 self.constants.len()
1515 }
1516
1517 /// Iterate over the [VCodeConstant] keys inserted in this structure.
1518 pub fn keys(&self) -> Keys<VCodeConstant> {
1519 self.constants.keys()
1520 }
1521
1522 /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
1523 /// structure.
1524 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1525 self.constants.iter()
1526 }
1527}
1528
1529/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1530#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1531pub struct VCodeConstant(u32);
1532entity_impl!(VCodeConstant);
1533
1534/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1535/// these separately instead of as raw byte buffers allows us to avoid some duplication.
1536pub enum VCodeConstantData {
1537 /// A constant already present in the Cranelift IR
1538 /// [ConstantPool](crate::ir::constant::ConstantPool).
1539 Pool(Constant, ConstantData),
1540 /// A reference to a well-known constant value that is statically encoded within the compiler.
1541 WellKnown(&'static [u8]),
1542 /// A constant value generated during lowering; the value may depend on the instruction context
1543 /// which makes it difficult to de-duplicate--if possible, use other variants.
1544 Generated(ConstantData),
1545 /// A constant of at most 64 bits. These are deduplicated as
1546 /// well. Stored as a fixed-size array of `u8` so that we do not
1547 /// encounter endianness problems when cross-compiling.
1548 U64([u8; 8]),
1549}
1550impl VCodeConstantData {
1551 /// Retrieve the constant data as a byte slice.
1552 pub fn as_slice(&self) -> &[u8] {
1553 match self {
1554 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1555 VCodeConstantData::WellKnown(d) => d,
1556 VCodeConstantData::U64(value) => &value[..],
1557 }
1558 }
1559
1560 /// Calculate the alignment of the constant data.
1561 pub fn alignment(&self) -> u32 {
1562 if self.as_slice().len() <= 8 {
1563 8
1564 } else {
1565 16
1566 }
1567 }
1568}
1569
1570#[cfg(test)]
1571mod test {
1572 use super::*;
1573 use std::mem::size_of;
1574
1575 #[test]
1576 fn size_of_constant_structs() {
1577 assert_eq!(size_of::<Constant>(), 4);
1578 assert_eq!(size_of::<VCodeConstant>(), 4);
1579 assert_eq!(size_of::<ConstantData>(), 24);
1580 assert_eq!(size_of::<VCodeConstantData>(), 32);
1581 assert_eq!(
1582 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1583 24
1584 );
1585 // TODO The VCodeConstants structure's memory size could be further optimized.
1586 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1587 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1588 }
1589}