cranelift_codegen/machinst/abi.rs
1//! Implementation of a vanilla ABI, shared between several machines. The
2//! implementation here assumes that arguments will be passed in registers
3//! first, then additional args on the stack; that the stack grows downward,
4//! contains a standard frame (return address and frame pointer), and the
5//! compiler is otherwise free to allocate space below that with its choice of
6//! layout; and that the machine has some notion of caller- and callee-save
7//! registers. Most modern machines, e.g. x86-64 and AArch64, should fit this
8//! mold and thus both of these backends use this shared implementation.
9//!
10//! See the documentation in specific machine backends for the "instantiation"
11//! of this generic ABI, i.e., which registers are caller/callee-save, arguments
12//! and return values, and any other special requirements.
13//!
14//! For now the implementation here assumes a 64-bit machine, but we intend to
15//! make this 32/64-bit-generic shortly.
16//!
17//! # Vanilla ABI
18//!
19//! First, arguments and return values are passed in registers up to a certain
20//! fixed count, after which they overflow onto the stack. Multiple return
21//! values either fit in registers, or are returned in a separate return-value
22//! area on the stack, given by a hidden extra parameter.
23//!
24//! Note that the exact stack layout is up to us. We settled on the
25//! below design based on several requirements. In particular, we need
26//! to be able to generate instructions (or instruction sequences) to
27//! access arguments, stack slots, and spill slots before we know how
28//! many spill slots or clobber-saves there will be, because of our
29//! pass structure. We also prefer positive offsets to negative
30//! offsets because of an asymmetry in some machines' addressing modes
31//! (e.g., on AArch64, positive offsets have a larger possible range
32//! without a long-form sequence to synthesize an arbitrary
33//! offset). We also need clobber-save registers to be "near" the
34//! frame pointer: Windows unwind information requires it to be within
35//! 240 bytes of RBP. Finally, it is not allowed to access memory
36//! below the current SP value.
37//!
38//! We assume that a prologue first pushes the frame pointer (and
39//! return address above that, if the machine does not do that in
40//! hardware). We set FP to point to this two-word frame record. We
41//! store all other frame slots below this two-word frame record, with
42//! the stack pointer remaining at or below this fixed frame storage
43//! for the rest of the function. We can then access frame storage
44//! slots using positive offsets from SP. In order to allow codegen
45//! for the latter before knowing how SP might be adjusted around
46//! callsites, we implement a "nominal SP" tracking feature by which a
47//! fixup (distance between actual SP and a "nominal" SP) is known at
48//! each instruction.
49//!
50//! Note that if we ever support dynamic stack-space allocation (for
51//! `alloca`), we will need a way to reference spill slots and stack
52//! slots without "nominal SP", because we will no longer be able to
53//! know a static offset from SP to the slots at any particular
54//! program point. Probably the best solution at that point will be to
55//! revert to using the frame pointer as the reference for all slots,
56//! and creating a "nominal FP" synthetic addressing mode (analogous
57//! to "nominal SP" today) to allow generating spill/reload and
58//! stackslot accesses before we know how large the clobber-saves will
59//! be.
60//!
61//! # Stack Layout
62//!
63//! The stack looks like:
64//!
65//! ```plain
66//! (high address)
67//!
68//! +---------------------------+
69//! | ... |
70//! | stack args |
71//! | (accessed via FP) |
72//! +---------------------------+
73//! SP at function entry -----> | return address |
74//! +---------------------------+
75//! FP after prologue --------> | FP (pushed by prologue) |
76//! +---------------------------+
77//! | ... |
78//! | clobbered callee-saves |
79//! unwind-frame base ----> | (pushed by prologue) |
80//! +---------------------------+
81//! | ... |
82//! | spill slots |
83//! | (accessed via nominal SP) |
84//! | ... |
85//! | stack slots |
86//! | (accessed via nominal SP) |
87//! nominal SP ---------------> | (alloc'd by prologue) |
88//! (SP at end of prologue) +---------------------------+
89//! | [alignment as needed] |
90//! | ... |
91//! | args for call |
92//! SP before making a call --> | (pushed at callsite) |
93//! +---------------------------+
94//!
95//! (low address)
96//! ```
97//!
98//! # Multi-value Returns
99//!
100//! We support multi-value returns by using multiple return-value
101//! registers. In some cases this is an extension of the base system
102//! ABI. See each platform's `abi.rs` implementation for details.
103
104use crate::binemit::StackMap;
105use crate::entity::{PrimaryMap, SecondaryMap};
106use crate::fx::FxHashMap;
107use crate::ir::types::*;
108use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot};
109use crate::isa::TargetIsa;
110use crate::settings;
111use crate::settings::ProbestackStrategy;
112use crate::CodegenResult;
113use crate::{ir, isa};
114use crate::{machinst::*, trace};
115use alloc::vec::Vec;
116use regalloc2::{PReg, PRegSet};
117use smallvec::{smallvec, SmallVec};
118use std::collections::HashMap;
119use std::convert::TryFrom;
120use std::marker::PhantomData;
121use std::mem;
122
123/// A small vector of instructions (with some reasonable size); appropriate for
124/// a small fixed sequence implementing one operation.
125pub type SmallInstVec<I> = SmallVec<[I; 4]>;
126
127/// A type used by backends to track argument-binding info in the "args"
128/// pseudoinst. The pseudoinst holds a vec of `ArgPair` structs.
129#[derive(Clone, Debug)]
130pub struct ArgPair {
131 /// The vreg that is defined by this args pseudoinst.
132 pub vreg: Writable<Reg>,
133 /// The preg that the arg arrives in; this constrains the vreg's
134 /// placement at the pseudoinst.
135 pub preg: Reg,
136}
137
138/// A type used by backends to track return register binding info in the "ret"
139/// pseudoinst. The pseudoinst holds a vec of `RetPair` structs.
140#[derive(Clone, Debug)]
141pub struct RetPair {
142 /// The vreg that is returned by this pseudionst.
143 pub vreg: Reg,
144 /// The preg that the arg is returned through; this constrains the vreg's
145 /// placement at the pseudoinst.
146 pub preg: Reg,
147}
148
149/// A location for (part of) an argument or return value. These "storage slots"
150/// are specified for each register-sized part of an argument.
151#[derive(Clone, Copy, Debug, PartialEq, Eq)]
152pub enum ABIArgSlot {
153 /// In a real register.
154 Reg {
155 /// Register that holds this arg.
156 reg: RealReg,
157 /// Value type of this arg.
158 ty: ir::Type,
159 /// Should this arg be zero- or sign-extended?
160 extension: ir::ArgumentExtension,
161 },
162 /// Arguments only: on stack, at given offset from SP at entry.
163 Stack {
164 /// Offset of this arg relative to the base of stack args.
165 offset: i64,
166 /// Value type of this arg.
167 ty: ir::Type,
168 /// Should this arg be zero- or sign-extended?
169 extension: ir::ArgumentExtension,
170 },
171}
172
173impl ABIArgSlot {
174 /// The type of the value that will be stored in this slot.
175 pub fn get_type(&self) -> ir::Type {
176 match self {
177 ABIArgSlot::Reg { ty, .. } => *ty,
178 ABIArgSlot::Stack { ty, .. } => *ty,
179 }
180 }
181}
182
183/// A vector of `ABIArgSlot`s. Inline capacity for one element because basically
184/// 100% of values use one slot. Only `i128`s need multiple slots, and they are
185/// super rare (and never happen with Wasm).
186pub type ABIArgSlotVec = SmallVec<[ABIArgSlot; 1]>;
187
188/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
189/// Value to be passed with its parts in more than one location at the ABI
190/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
191/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
192/// number of "parts" should correspond to the number of registers used to store
193/// this type according to the machine backend.
194///
195/// As an invariant, the `purpose` for every part must match. As a further
196/// invariant, a `StructArg` part cannot appear with any other part.
197#[derive(Clone, Debug)]
198pub enum ABIArg {
199 /// Storage slots (registers or stack locations) for each part of the
200 /// argument value. The number of slots must equal the number of register
201 /// parts used to store a value of this type.
202 Slots {
203 /// Slots, one per register part.
204 slots: ABIArgSlotVec,
205 /// Purpose of this arg.
206 purpose: ir::ArgumentPurpose,
207 },
208 /// Structure argument. We reserve stack space for it, but the CLIF-level
209 /// semantics are a little weird: the value passed to the call instruction,
210 /// and received in the corresponding block param, is a *pointer*. On the
211 /// caller side, we memcpy the data from the passed-in pointer to the stack
212 /// area; on the callee side, we compute a pointer to this stack area and
213 /// provide that as the argument's value.
214 StructArg {
215 /// Register or stack slot holding a pointer to the buffer as passed
216 /// by the caller to the callee. If None, the ABI defines the buffer
217 /// to reside at a well-known location (i.e. at `offset` below).
218 pointer: Option<ABIArgSlot>,
219 /// Offset of this arg relative to base of stack args.
220 offset: i64,
221 /// Size of this arg on the stack.
222 size: u64,
223 /// Purpose of this arg.
224 purpose: ir::ArgumentPurpose,
225 },
226 /// Implicit argument. Similar to a StructArg, except that we have the
227 /// target type, not a pointer type, at the CLIF-level. This argument is
228 /// still being passed via reference implicitly.
229 ImplicitPtrArg {
230 /// Register or stack slot holding a pointer to the buffer.
231 pointer: ABIArgSlot,
232 /// Offset of the argument buffer.
233 offset: i64,
234 /// Type of the implicit argument.
235 ty: Type,
236 /// Purpose of this arg.
237 purpose: ir::ArgumentPurpose,
238 },
239}
240
241impl ABIArg {
242 /// Create an ABIArg from one register.
243 pub fn reg(
244 reg: RealReg,
245 ty: ir::Type,
246 extension: ir::ArgumentExtension,
247 purpose: ir::ArgumentPurpose,
248 ) -> ABIArg {
249 ABIArg::Slots {
250 slots: smallvec![ABIArgSlot::Reg { reg, ty, extension }],
251 purpose,
252 }
253 }
254
255 /// Create an ABIArg from one stack slot.
256 pub fn stack(
257 offset: i64,
258 ty: ir::Type,
259 extension: ir::ArgumentExtension,
260 purpose: ir::ArgumentPurpose,
261 ) -> ABIArg {
262 ABIArg::Slots {
263 slots: smallvec![ABIArgSlot::Stack {
264 offset,
265 ty,
266 extension,
267 }],
268 purpose,
269 }
270 }
271}
272
273/// Are we computing information about arguments or return values? Much of the
274/// handling is factored out into common routines; this enum allows us to
275/// distinguish which case we're handling.
276#[derive(Clone, Copy, Debug, PartialEq, Eq)]
277pub enum ArgsOrRets {
278 /// Arguments.
279 Args,
280 /// Return values.
281 Rets,
282}
283
284/// Abstract location for a machine-specific ABI impl to translate into the
285/// appropriate addressing mode.
286#[derive(Clone, Copy, Debug)]
287pub enum StackAMode {
288 /// Offset from the frame pointer, possibly making use of a specific type
289 /// for a scaled indexing operation.
290 FPOffset(i64, ir::Type),
291 /// Offset from the nominal stack pointer, possibly making use of a specific
292 /// type for a scaled indexing operation.
293 NominalSPOffset(i64, ir::Type),
294 /// Offset from the real stack pointer, possibly making use of a specific
295 /// type for a scaled indexing operation.
296 SPOffset(i64, ir::Type),
297}
298
299impl StackAMode {
300 /// Offset by an addend.
301 pub fn offset(self, addend: i64) -> Self {
302 match self {
303 StackAMode::FPOffset(off, ty) => StackAMode::FPOffset(off + addend, ty),
304 StackAMode::NominalSPOffset(off, ty) => StackAMode::NominalSPOffset(off + addend, ty),
305 StackAMode::SPOffset(off, ty) => StackAMode::SPOffset(off + addend, ty),
306 }
307 }
308}
309
310/// Trait implemented by machine-specific backend to represent ISA flags.
311pub trait IsaFlags: Clone {
312 /// Get a flag indicating whether forward-edge CFI is enabled.
313 fn is_forward_edge_cfi_enabled(&self) -> bool {
314 false
315 }
316}
317
318/// Used as an out-parameter to accumulate a sequence of `ABIArg`s in
319/// `ABIMachineSpec::compute_arg_locs`. Wraps the shared allocation for all
320/// `ABIArg`s in `SigSet` and exposes just the args for the current
321/// `compute_arg_locs` call.
322pub struct ArgsAccumulator<'a> {
323 sig_set_abi_args: &'a mut Vec<ABIArg>,
324 start: usize,
325}
326
327impl<'a> ArgsAccumulator<'a> {
328 fn new(sig_set_abi_args: &'a mut Vec<ABIArg>) -> Self {
329 let start = sig_set_abi_args.len();
330 ArgsAccumulator {
331 sig_set_abi_args,
332 start,
333 }
334 }
335
336 #[inline]
337 pub fn push(&mut self, arg: ABIArg) {
338 self.sig_set_abi_args.push(arg)
339 }
340
341 #[inline]
342 pub fn args(&self) -> &[ABIArg] {
343 &self.sig_set_abi_args[self.start..]
344 }
345
346 #[inline]
347 pub fn args_mut(&mut self) -> &mut [ABIArg] {
348 &mut self.sig_set_abi_args[self.start..]
349 }
350}
351
352/// Trait implemented by machine-specific backend to provide information about
353/// register assignments and to allow generating the specific instructions for
354/// stack loads/saves, prologues/epilogues, etc.
355pub trait ABIMachineSpec {
356 /// The instruction type.
357 type I: VCodeInst;
358
359 /// The ISA flags type.
360 type F: IsaFlags;
361
362 /// Returns the number of bits in a word, that is 32/64 for 32/64-bit architecture.
363 fn word_bits() -> u32;
364
365 /// Returns the number of bytes in a word.
366 fn word_bytes() -> u32 {
367 return Self::word_bits() / 8;
368 }
369
370 /// Returns word-size integer type.
371 fn word_type() -> Type {
372 match Self::word_bits() {
373 32 => I32,
374 64 => I64,
375 _ => unreachable!(),
376 }
377 }
378
379 /// Returns word register class.
380 fn word_reg_class() -> RegClass {
381 RegClass::Int
382 }
383
384 /// Returns required stack alignment in bytes.
385 fn stack_align(call_conv: isa::CallConv) -> u32;
386
387 /// Process a list of parameters or return values and allocate them to registers
388 /// and stack slots.
389 ///
390 /// The argument locations should be pushed onto the given `ArgsAccumulator`
391 /// in order.
392 ///
393 /// Returns the stack-space used (rounded up to as alignment requires), and
394 /// if `add_ret_area_ptr` was passed, the index of the extra synthetic arg
395 /// that was added.
396 fn compute_arg_locs<'a, I>(
397 call_conv: isa::CallConv,
398 flags: &settings::Flags,
399 params: I,
400 args_or_rets: ArgsOrRets,
401 add_ret_area_ptr: bool,
402 args: ArgsAccumulator<'_>,
403 ) -> CodegenResult<(u32, Option<usize>)>
404 where
405 I: IntoIterator<Item = &'a ir::AbiParam>;
406
407 /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
408 /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
409 fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64;
410
411 /// Generate a load from the stack.
412 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
413
414 /// Generate a store to the stack.
415 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I;
416
417 /// Generate a move.
418 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I;
419
420 /// Generate an integer-extend operation.
421 fn gen_extend(
422 to_reg: Writable<Reg>,
423 from_reg: Reg,
424 is_signed: bool,
425 from_bits: u8,
426 to_bits: u8,
427 ) -> Self::I;
428
429 /// Generate an "args" pseudo-instruction to capture input args in
430 /// registers.
431 fn gen_args(isa_flags: &Self::F, args: Vec<ArgPair>) -> Self::I;
432
433 /// Generate a return instruction.
434 fn gen_ret(setup_frame: bool, isa_flags: &Self::F, rets: Vec<RetPair>) -> Self::I;
435
436 /// Generate an add-with-immediate. Note that even if this uses a scratch
437 /// register, it must satisfy two requirements:
438 ///
439 /// - The add-imm sequence must only clobber caller-save registers, because
440 /// it will be placed in the prologue before the clobbered callee-save
441 /// registers are saved.
442 ///
443 /// - The add-imm sequence must work correctly when `from_reg` and/or
444 /// `into_reg` are the register returned by `get_stacklimit_reg()`.
445 fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Self::I>;
446
447 /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
448 /// the stack pointer is less than the given limit register (assuming the
449 /// stack grows downward).
450 fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I>;
451
452 /// Generate an instruction to compute an address of a stack slot (FP- or
453 /// SP-based offset).
454 fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
455
456 /// Get a fixed register to use to compute a stack limit. This is needed for
457 /// certain sequences generated after the register allocator has already
458 /// run. This must satisfy two requirements:
459 ///
460 /// - It must be a caller-save register, because it will be clobbered in the
461 /// prologue before the clobbered callee-save registers are saved.
462 ///
463 /// - It must be safe to pass as an argument and/or destination to
464 /// `gen_add_imm()`. This is relevant when an addition with a large
465 /// immediate needs its own temporary; it cannot use the same fixed
466 /// temporary as this one.
467 fn get_stacklimit_reg() -> Reg;
468
469 /// Generate a store to the given [base+offset] address.
470 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;
471
472 /// Generate a load from the given [base+offset] address.
473 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;
474
475 /// Adjust the stack pointer up or down.
476 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I>;
477
478 /// Generate a meta-instruction that adjusts the nominal SP offset.
479 fn gen_nominal_sp_adj(amount: i32) -> Self::I;
480
481 /// Generates the mandatory part of the prologue, irrespective of whether
482 /// the usual frame-setup sequence for this architecture is required or not,
483 /// e.g. extra unwind instructions.
484 fn gen_prologue_start(
485 _setup_frame: bool,
486 _call_conv: isa::CallConv,
487 _flags: &settings::Flags,
488 _isa_flags: &Self::F,
489 ) -> SmallInstVec<Self::I> {
490 // By default, generates nothing.
491 smallvec![]
492 }
493
494 /// Generate the usual frame-setup sequence for this architecture: e.g.,
495 /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
496 /// AArch64.
497 fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Self::I>;
498
499 /// Generate the usual frame-restore sequence for this architecture.
500 fn gen_epilogue_frame_restore(flags: &settings::Flags) -> SmallInstVec<Self::I>;
501
502 /// Generate a probestack call.
503 fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32);
504
505 /// Generate a inline stack probe.
506 fn gen_inline_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32, guard_size: u32);
507
508 /// Get all clobbered registers that are callee-saved according to the ABI; the result
509 /// contains the registers in a sorted order.
510 fn get_clobbered_callee_saves(
511 call_conv: isa::CallConv,
512 flags: &settings::Flags,
513 sig: &Signature,
514 regs: &[Writable<RealReg>],
515 ) -> Vec<Writable<RealReg>>;
516
517 /// Determine whether it is necessary to generate the usual frame-setup
518 /// sequence (refer to gen_prologue_frame_setup()).
519 fn is_frame_setup_needed(
520 is_leaf: bool,
521 stack_args_size: u32,
522 num_clobbered_callee_saves: usize,
523 fixed_frame_storage_size: u32,
524 ) -> bool;
525
526 /// Generate a clobber-save sequence. The implementation here should return
527 /// a sequence of instructions that "push" or otherwise save to the stack all
528 /// registers written/modified by the function body that are callee-saved.
529 /// The sequence of instructions should adjust the stack pointer downward,
530 /// and should align as necessary according to ABI requirements.
531 ///
532 /// Returns stack bytes used as well as instructions. Does not adjust
533 /// nominal SP offset; caller will do that.
534 fn gen_clobber_save(
535 call_conv: isa::CallConv,
536 setup_frame: bool,
537 flags: &settings::Flags,
538 clobbered_callee_saves: &[Writable<RealReg>],
539 fixed_frame_storage_size: u32,
540 outgoing_args_size: u32,
541 ) -> (u64, SmallVec<[Self::I; 16]>);
542
543 /// Generate a clobber-restore sequence. This sequence should perform the
544 /// opposite of the clobber-save sequence generated above, assuming that SP
545 /// going into the sequence is at the same point that it was left when the
546 /// clobber-save sequence finished.
547 fn gen_clobber_restore(
548 call_conv: isa::CallConv,
549 sig: &Signature,
550 flags: &settings::Flags,
551 clobbers: &[Writable<RealReg>],
552 fixed_frame_storage_size: u32,
553 outgoing_args_size: u32,
554 ) -> SmallVec<[Self::I; 16]>;
555
556 /// Generate a call instruction/sequence. This method is provided one
557 /// temporary register to use to synthesize the called address, if needed.
558 fn gen_call(
559 dest: &CallDest,
560 uses: CallArgList,
561 defs: CallRetList,
562 clobbers: PRegSet,
563 opcode: ir::Opcode,
564 tmp: Writable<Reg>,
565 callee_conv: isa::CallConv,
566 caller_conv: isa::CallConv,
567 ) -> SmallVec<[Self::I; 2]>;
568
569 /// Generate a memcpy invocation. Used to set up struct
570 /// args. Takes `src`, `dst` as read-only inputs and passes a temporary
571 /// allocator.
572 fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
573 call_conv: isa::CallConv,
574 dst: Reg,
575 src: Reg,
576 size: usize,
577 alloc_tmp: F,
578 ) -> SmallVec<[Self::I; 8]>;
579
580 /// Get the number of spillslots required for the given register-class.
581 fn get_number_of_spillslots_for_value(rc: RegClass, target_vector_bytes: u32) -> u32;
582
583 /// Get the current virtual-SP offset from an instruction-emission state.
584 fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64;
585
586 /// Get the "nominal SP to FP" offset from an instruction-emission state.
587 fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64;
588
589 /// Get all caller-save registers, that is, registers that we expect
590 /// not to be saved across a call to a callee with the given ABI.
591 fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet;
592
593 /// Get the needed extension mode, given the mode attached to the argument
594 /// in the signature and the calling convention. The input (the attribute in
595 /// the signature) specifies what extension type should be done *if* the ABI
596 /// requires extension to the full register; this method's return value
597 /// indicates whether the extension actually *will* be done.
598 fn get_ext_mode(
599 call_conv: isa::CallConv,
600 specified: ir::ArgumentExtension,
601 ) -> ir::ArgumentExtension;
602}
603
604/// The id of an ABI signature within the `SigSet`.
605#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
606pub struct Sig(u32);
607cranelift_entity::entity_impl!(Sig);
608
609impl Sig {
610 fn prev(self) -> Option<Sig> {
611 self.0.checked_sub(1).map(Sig)
612 }
613}
614
615/// ABI information shared between body (callee) and caller.
616#[derive(Clone, Debug)]
617pub struct SigData {
618 /// Currently both return values and arguments are stored in a continuous space vector
619 /// in `SigSet::abi_args`.
620 ///
621 /// ```plain
622 /// +----------------------------------------------+
623 /// | return values |
624 /// | ... |
625 /// rets_end --> +----------------------------------------------+
626 /// | arguments |
627 /// | ... |
628 /// args_end --> +----------------------------------------------+
629 ///
630 /// ```
631 ///
632 /// Note we only store two offsets as rets_end == args_start, and rets_start == prev.args_end.
633 ///
634 /// Argument location ending offset (regs or stack slots). Stack offsets are relative to
635 /// SP on entry to function.
636 ///
637 /// This is a index into the `SigSet::abi_args`.
638 args_end: u32,
639
640 /// Return-value location ending offset. Stack offsets are relative to the return-area
641 /// pointer.
642 ///
643 /// This is a index into the `SigSet::abi_args`.
644 rets_end: u32,
645
646 /// Space on stack used to store arguments. We're storing the size in u32 to
647 /// reduce the size of the struct.
648 sized_stack_arg_space: u32,
649
650 /// Space on stack used to store return values. We're storing the size in u32 to
651 /// reduce the size of the struct.
652 sized_stack_ret_space: u32,
653
654 /// Index in `args` of the stack-return-value-area argument.
655 stack_ret_arg: Option<u16>,
656
657 /// Calling convention used.
658 call_conv: isa::CallConv,
659}
660
661impl SigData {
662 /// Get total stack space required for arguments.
663 pub fn sized_stack_arg_space(&self) -> i64 {
664 self.sized_stack_arg_space.into()
665 }
666
667 /// Get total stack space required for return values.
668 pub fn sized_stack_ret_space(&self) -> i64 {
669 self.sized_stack_ret_space.into()
670 }
671
672 /// Get calling convention used.
673 pub fn call_conv(&self) -> isa::CallConv {
674 self.call_conv
675 }
676}
677
678/// A (mostly) deduplicated set of ABI signatures.
679///
680/// We say "mostly" because we do not dedupe between signatures interned via
681/// `ir::SigRef` (direct and indirect calls; the vast majority of signatures in
682/// this set) vs via `ir::Signature` (the callee itself and libcalls). Doing
683/// this final bit of deduplication would require filling out the
684/// `ir_signature_to_abi_sig`, which is a bunch of allocations (not just the
685/// hash map itself but params and returns vecs in each signature) that we want
686/// to avoid.
687///
688/// In general, prefer using the `ir::SigRef`-taking methods to the
689/// `ir::Signature`-taking methods when you can get away with it, as they don't
690/// require cloning non-copy types that will trigger heap allocations.
691///
692/// This type can be indexed by `Sig` to access its associated `SigData`.
693pub struct SigSet {
694 /// Interned `ir::Signature`s that we already have an ABI signature for.
695 ir_signature_to_abi_sig: FxHashMap<ir::Signature, Sig>,
696
697 /// Interned `ir::SigRef`s that we already have an ABI signature for.
698 ir_sig_ref_to_abi_sig: SecondaryMap<ir::SigRef, Option<Sig>>,
699
700 /// A single, shared allocation for all `ABIArg`s used by all
701 /// `SigData`s. Each `SigData` references its args/rets via indices into
702 /// this allocation.
703 abi_args: Vec<ABIArg>,
704
705 /// The actual ABI signatures, keyed by `Sig`.
706 sigs: PrimaryMap<Sig, SigData>,
707}
708
709impl SigSet {
710 /// Construct a new `SigSet`, interning all of the signatures used by the
711 /// given function.
712 pub fn new<M>(func: &ir::Function, flags: &settings::Flags) -> CodegenResult<Self>
713 where
714 M: ABIMachineSpec,
715 {
716 let arg_estimate = func.dfg.signatures.len() * 6;
717
718 let mut sigs = SigSet {
719 ir_signature_to_abi_sig: FxHashMap::default(),
720 ir_sig_ref_to_abi_sig: SecondaryMap::with_capacity(func.dfg.signatures.len()),
721 abi_args: Vec::with_capacity(arg_estimate),
722 sigs: PrimaryMap::with_capacity(1 + func.dfg.signatures.len()),
723 };
724
725 sigs.make_abi_sig_from_ir_signature::<M>(func.signature.clone(), flags)?;
726 for sig_ref in func.dfg.signatures.keys() {
727 sigs.make_abi_sig_from_ir_sig_ref::<M>(sig_ref, &func.dfg, flags)?;
728 }
729
730 Ok(sigs)
731 }
732
733 /// Have we already interned an ABI signature for the given `ir::Signature`?
734 pub fn have_abi_sig_for_signature(&self, signature: &ir::Signature) -> bool {
735 self.ir_signature_to_abi_sig.contains_key(signature)
736 }
737
738 /// Construct and intern an ABI signature for the given `ir::Signature`.
739 pub fn make_abi_sig_from_ir_signature<M>(
740 &mut self,
741 signature: ir::Signature,
742 flags: &settings::Flags,
743 ) -> CodegenResult<Sig>
744 where
745 M: ABIMachineSpec,
746 {
747 // Because the `HashMap` entry API requires taking ownership of the
748 // lookup key -- and we want to avoid unnecessary clones of
749 // `ir::Signature`s, even at the cost of duplicate lookups -- we can't
750 // have a single, get-or-create-style method for interning
751 // `ir::Signature`s into ABI signatures. So at least (debug) assert that
752 // we aren't creating duplicate ABI signatures for the same
753 // `ir::Signature`.
754 debug_assert!(!self.have_abi_sig_for_signature(&signature));
755
756 let sig_data = self.from_func_sig::<M>(&signature, flags)?;
757 let sig = self.sigs.push(sig_data);
758 self.ir_signature_to_abi_sig.insert(signature, sig);
759 Ok(sig)
760 }
761
762 fn make_abi_sig_from_ir_sig_ref<M>(
763 &mut self,
764 sig_ref: ir::SigRef,
765 dfg: &ir::DataFlowGraph,
766 flags: &settings::Flags,
767 ) -> CodegenResult<Sig>
768 where
769 M: ABIMachineSpec,
770 {
771 if let Some(sig) = self.ir_sig_ref_to_abi_sig[sig_ref] {
772 return Ok(sig);
773 }
774 let signature = &dfg.signatures[sig_ref];
775 let sig_data = self.from_func_sig::<M>(signature, flags)?;
776 let sig = self.sigs.push(sig_data);
777 self.ir_sig_ref_to_abi_sig[sig_ref] = Some(sig);
778 Ok(sig)
779 }
780
781 /// Get the already-interned ABI signature id for the given `ir::SigRef`.
782 pub fn abi_sig_for_sig_ref(&self, sig_ref: ir::SigRef) -> Sig {
783 self.ir_sig_ref_to_abi_sig
784 .get(sig_ref)
785 // Should have a secondary map entry...
786 .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`")
787 // ...and that entry should be initialized.
788 .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`")
789 }
790
791 /// Get the already-interned ABI signature id for the given `ir::Signature`.
792 pub fn abi_sig_for_signature(&self, signature: &ir::Signature) -> Sig {
793 self.ir_signature_to_abi_sig
794 .get(signature)
795 .copied()
796 .expect("must call `make_abi_sig_from_ir_signature` before `get_abi_sig_for_signature`")
797 }
798
799 pub fn from_func_sig<M: ABIMachineSpec>(
800 &mut self,
801 sig: &ir::Signature,
802 flags: &settings::Flags,
803 ) -> CodegenResult<SigData> {
804 let sret = missing_struct_return(sig);
805 let returns = sret.as_ref().into_iter().chain(&sig.returns);
806
807 // Compute args and retvals from signature. Handle retvals first,
808 // because we may need to add a return-area arg to the args.
809
810 // NOTE: We rely on the order of the args (rets -> args) inserted to compute the offsets in
811 // `SigSet::args()` and `SigSet::rets()`. Therefore, we cannot change the two
812 // compute_arg_locs order.
813 let (sized_stack_ret_space, _) = M::compute_arg_locs(
814 sig.call_conv,
815 flags,
816 returns,
817 ArgsOrRets::Rets,
818 /* extra ret-area ptr = */ false,
819 ArgsAccumulator::new(&mut self.abi_args),
820 )?;
821 let rets_end = u32::try_from(self.abi_args.len()).unwrap();
822
823 let need_stack_return_area = sized_stack_ret_space > 0;
824 let (sized_stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
825 sig.call_conv,
826 flags,
827 &sig.params,
828 ArgsOrRets::Args,
829 need_stack_return_area,
830 ArgsAccumulator::new(&mut self.abi_args),
831 )?;
832 let args_end = u32::try_from(self.abi_args.len()).unwrap();
833
834 trace!(
835 "ABISig: sig {:?} => args end = {} rets end = {}
836 arg stack = {} ret stack = {} stack_ret_arg = {:?}",
837 sig,
838 args_end,
839 rets_end,
840 sized_stack_arg_space,
841 sized_stack_ret_space,
842 need_stack_return_area,
843 );
844
845 let stack_ret_arg = stack_ret_arg.map(|s| u16::try_from(s).unwrap());
846 Ok(SigData {
847 args_end,
848 rets_end,
849 sized_stack_arg_space,
850 sized_stack_ret_space,
851 stack_ret_arg,
852 call_conv: sig.call_conv,
853 })
854 }
855
856 /// Get this signature's ABI arguments.
857 pub fn args(&self, sig: Sig) -> &[ABIArg] {
858 let sig_data = &self.sigs[sig];
859 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
860 let start = usize::try_from(sig_data.rets_end).unwrap();
861 let end = usize::try_from(sig_data.args_end).unwrap();
862 &self.abi_args[start..end]
863 }
864
865 /// Get information specifying how to pass the implicit pointer
866 /// to the return-value area on the stack, if required.
867 pub fn get_ret_arg(&self, sig: Sig) -> Option<ABIArg> {
868 let sig_data = &self.sigs[sig];
869 if let Some(i) = sig_data.stack_ret_arg {
870 Some(self.args(sig)[usize::from(i)].clone())
871 } else {
872 None
873 }
874 }
875
876 /// Get information specifying how to pass one argument.
877 pub fn get_arg(&self, sig: Sig, idx: usize) -> ABIArg {
878 self.args(sig)[idx].clone()
879 }
880
881 /// Get this signature's ABI returns.
882 pub fn rets(&self, sig: Sig) -> &[ABIArg] {
883 let sig_data = &self.sigs[sig];
884 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
885 let start = usize::try_from(sig.prev().map_or(0, |prev| self.sigs[prev].args_end)).unwrap();
886 let end = usize::try_from(sig_data.rets_end).unwrap();
887 &self.abi_args[start..end]
888 }
889
890 /// Get information specifying how to pass one return value.
891 pub fn get_ret(&self, sig: Sig, idx: usize) -> ABIArg {
892 self.rets(sig)[idx].clone()
893 }
894
895 /// Return all clobbers for the callsite.
896 pub fn call_clobbers<M: ABIMachineSpec>(&self, sig: Sig) -> PRegSet {
897 let sig_data = &self.sigs[sig];
898 // Get clobbers: all caller-saves. These may include return value
899 // regs, which we will remove from the clobber set below.
900 let mut clobbers = M::get_regs_clobbered_by_call(sig_data.call_conv);
901
902 // Remove retval regs from clobbers. Skip StructRets: these
903 // are not, semantically, returns at the CLIF level, so we
904 // treat such a value as a clobber instead.
905 for ret in self.rets(sig) {
906 if let &ABIArg::Slots {
907 ref slots, purpose, ..
908 } = ret
909 {
910 if purpose == ir::ArgumentPurpose::StructReturn {
911 continue;
912 }
913 for slot in slots {
914 match slot {
915 &ABIArgSlot::Reg { reg, .. } => {
916 log::trace!("call_clobbers: retval reg {:?}", reg);
917 clobbers.remove(PReg::from(reg));
918 }
919 _ => {}
920 }
921 }
922 }
923 }
924
925 clobbers
926 }
927
928 /// Get the number of arguments expected.
929 pub fn num_args(&self, sig: Sig) -> usize {
930 let len = self.args(sig).len();
931 if self.sigs[sig].stack_ret_arg.is_some() {
932 len - 1
933 } else {
934 len
935 }
936 }
937
938 /// Get the number of return values expected.
939 pub fn num_rets(&self, sig: Sig) -> usize {
940 self.rets(sig).len()
941 }
942}
943
944// NB: we do _not_ implement `IndexMut` because these signatures are
945// deduplicated and shared!
946impl std::ops::Index<Sig> for SigSet {
947 type Output = SigData;
948
949 fn index(&self, sig: Sig) -> &Self::Output {
950 &self.sigs[sig]
951 }
952}
953
954/// ABI object for a function body.
955pub struct Callee<M: ABIMachineSpec> {
956 /// CLIF-level signature, possibly normalized.
957 ir_sig: ir::Signature,
958 /// Signature: arg and retval regs.
959 sig: Sig,
960 /// Defined dynamic types.
961 dynamic_type_sizes: HashMap<Type, u32>,
962 /// Offsets to each dynamic stackslot.
963 dynamic_stackslots: PrimaryMap<DynamicStackSlot, u32>,
964 /// Offsets to each sized stackslot.
965 sized_stackslots: PrimaryMap<StackSlot, u32>,
966 /// Total stack size of all stackslots
967 stackslots_size: u32,
968 /// Stack size to be reserved for outgoing arguments.
969 outgoing_args_size: u32,
970 /// Register-argument defs, to be provided to the `args`
971 /// pseudo-inst, and pregs to constrain them to.
972 reg_args: Vec<ArgPair>,
973 /// Clobbered registers, from regalloc.
974 clobbered: Vec<Writable<RealReg>>,
975 /// Total number of spillslots, including for 'dynamic' types, from regalloc.
976 spillslots: Option<usize>,
977 /// Storage allocated for the fixed part of the stack frame. This is
978 /// usually the same as the total frame size below.
979 fixed_frame_storage_size: u32,
980 /// "Total frame size", as defined by "distance between FP and nominal SP".
981 /// Some items are pushed below nominal SP, so the function may actually use
982 /// more stack than this would otherwise imply. It is simply the initial
983 /// frame/allocation size needed for stackslots and spillslots.
984 total_frame_size: Option<u32>,
985 /// The register holding the return-area pointer, if needed.
986 ret_area_ptr: Option<Writable<Reg>>,
987 /// Temp registers required for argument setup, if needed.
988 arg_temp_reg: Vec<Option<Writable<Reg>>>,
989 /// Calling convention this function expects.
990 call_conv: isa::CallConv,
991 /// The settings controlling this function's compilation.
992 flags: settings::Flags,
993 /// The ISA-specific flag values controlling this function's compilation.
994 isa_flags: M::F,
995 /// Whether or not this function is a "leaf", meaning it calls no other
996 /// functions
997 is_leaf: bool,
998 /// If this function has a stack limit specified, then `Reg` is where the
999 /// stack limit will be located after the instructions specified have been
1000 /// executed.
1001 ///
1002 /// Note that this is intended for insertion into the prologue, if
1003 /// present. Also note that because the instructions here execute in the
1004 /// prologue this happens after legalization/register allocation/etc so we
1005 /// need to be extremely careful with each instruction. The instructions are
1006 /// manually register-allocated and carefully only use caller-saved
1007 /// registers and keep nothing live after this sequence of instructions.
1008 stack_limit: Option<(Reg, SmallInstVec<M::I>)>,
1009 /// Are we to invoke the probestack function in the prologue? If so,
1010 /// what is the minimum size at which we must invoke it?
1011 probestack_min_frame: Option<u32>,
1012 /// Whether it is necessary to generate the usual frame-setup sequence.
1013 setup_frame: bool,
1014
1015 _mach: PhantomData<M>,
1016}
1017
1018fn get_special_purpose_param_register(
1019 f: &ir::Function,
1020 sigs: &SigSet,
1021 sig: Sig,
1022 purpose: ir::ArgumentPurpose,
1023) -> Option<Reg> {
1024 let idx = f.signature.special_param_index(purpose)?;
1025 match &sigs.args(sig)[idx] {
1026 &ABIArg::Slots { ref slots, .. } => match &slots[0] {
1027 &ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
1028 _ => None,
1029 },
1030 _ => None,
1031 }
1032}
1033
1034impl<M: ABIMachineSpec> Callee<M> {
1035 /// Create a new body ABI instance.
1036 pub fn new<'a>(
1037 f: &ir::Function,
1038 isa: &dyn TargetIsa,
1039 isa_flags: &M::F,
1040 sigs: &SigSet,
1041 ) -> CodegenResult<Self> {
1042 trace!("ABI: func signature {:?}", f.signature);
1043
1044 let flags = isa.flags().clone();
1045 let sig = sigs.abi_sig_for_signature(&f.signature);
1046
1047 let call_conv = f.signature.call_conv;
1048 // Only these calling conventions are supported.
1049 debug_assert!(
1050 call_conv == isa::CallConv::SystemV
1051 || call_conv == isa::CallConv::Fast
1052 || call_conv == isa::CallConv::Cold
1053 || call_conv.extends_windows_fastcall()
1054 || call_conv == isa::CallConv::AppleAarch64
1055 || call_conv == isa::CallConv::WasmtimeSystemV
1056 || call_conv == isa::CallConv::WasmtimeAppleAarch64,
1057 "Unsupported calling convention: {:?}",
1058 call_conv
1059 );
1060
1061 // Compute sized stackslot locations and total stackslot size.
1062 let mut sized_stack_offset: u32 = 0;
1063 let mut sized_stackslots = PrimaryMap::new();
1064 for (stackslot, data) in f.sized_stack_slots.iter() {
1065 let off = sized_stack_offset;
1066 sized_stack_offset += data.size;
1067 let mask = M::word_bytes() - 1;
1068 sized_stack_offset = (sized_stack_offset + mask) & !mask;
1069 debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
1070 sized_stackslots.push(off);
1071 }
1072
1073 // Compute dynamic stackslot locations and total stackslot size.
1074 let mut dynamic_stackslots = PrimaryMap::new();
1075 let mut dynamic_stack_offset: u32 = sized_stack_offset;
1076 for (stackslot, data) in f.dynamic_stack_slots.iter() {
1077 debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len());
1078 let off = dynamic_stack_offset;
1079 let ty = f
1080 .get_concrete_dynamic_ty(data.dyn_ty)
1081 .unwrap_or_else(|| panic!("invalid dynamic vector type: {}", data.dyn_ty));
1082 dynamic_stack_offset += isa.dynamic_vector_bytes(ty);
1083 let mask = M::word_bytes() - 1;
1084 dynamic_stack_offset = (dynamic_stack_offset + mask) & !mask;
1085 dynamic_stackslots.push(off);
1086 }
1087 let stackslots_size = dynamic_stack_offset;
1088
1089 let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len());
1090 for (dyn_ty, _data) in f.dfg.dynamic_types.iter() {
1091 let ty = f
1092 .get_concrete_dynamic_ty(dyn_ty)
1093 .unwrap_or_else(|| panic!("invalid dynamic vector type: {}", dyn_ty));
1094 let size = isa.dynamic_vector_bytes(ty);
1095 dynamic_type_sizes.insert(ty, size);
1096 }
1097
1098 // Figure out what instructions, if any, will be needed to check the
1099 // stack limit. This can either be specified as a special-purpose
1100 // argument or as a global value which often calculates the stack limit
1101 // from the arguments.
1102 let stack_limit =
1103 get_special_purpose_param_register(f, sigs, sig, ir::ArgumentPurpose::StackLimit)
1104 .map(|reg| (reg, smallvec![]))
1105 .or_else(|| {
1106 f.stack_limit
1107 .map(|gv| gen_stack_limit::<M>(f, sigs, sig, gv))
1108 });
1109
1110 // Determine whether a probestack call is required for large enough
1111 // frames (and the minimum frame size if so).
1112 let probestack_min_frame = if flags.enable_probestack() {
1113 assert!(
1114 !flags.probestack_func_adjusts_sp(),
1115 "SP-adjusting probestack not supported in new backends"
1116 );
1117 Some(1 << flags.probestack_size_log2())
1118 } else {
1119 None
1120 };
1121
1122 Ok(Self {
1123 ir_sig: ensure_struct_return_ptr_is_returned(&f.signature),
1124 sig,
1125 dynamic_stackslots,
1126 dynamic_type_sizes,
1127 sized_stackslots,
1128 stackslots_size,
1129 outgoing_args_size: 0,
1130 reg_args: vec![],
1131 clobbered: vec![],
1132 spillslots: None,
1133 fixed_frame_storage_size: 0,
1134 total_frame_size: None,
1135 ret_area_ptr: None,
1136 arg_temp_reg: vec![],
1137 call_conv,
1138 flags,
1139 isa_flags: isa_flags.clone(),
1140 is_leaf: f.is_leaf(),
1141 stack_limit,
1142 probestack_min_frame,
1143 setup_frame: true,
1144 _mach: PhantomData,
1145 })
1146 }
1147
1148 /// Inserts instructions necessary for checking the stack limit into the
1149 /// prologue.
1150 ///
1151 /// This function will generate instructions necessary for perform a stack
1152 /// check at the header of a function. The stack check is intended to trap
1153 /// if the stack pointer goes below a particular threshold, preventing stack
1154 /// overflow in wasm or other code. The `stack_limit` argument here is the
1155 /// register which holds the threshold below which we're supposed to trap.
1156 /// This function is known to allocate `stack_size` bytes and we'll push
1157 /// instructions onto `insts`.
1158 ///
1159 /// Note that the instructions generated here are special because this is
1160 /// happening so late in the pipeline (e.g. after register allocation). This
1161 /// means that we need to do manual register allocation here and also be
1162 /// careful to not clobber any callee-saved or argument registers. For now
1163 /// this routine makes do with the `spilltmp_reg` as one temporary
1164 /// register, and a second register of `tmp2` which is caller-saved. This
1165 /// should be fine for us since no spills should happen in this sequence of
1166 /// instructions, so our register won't get accidentally clobbered.
1167 ///
1168 /// No values can be live after the prologue, but in this case that's ok
1169 /// because we just need to perform a stack check before progressing with
1170 /// the rest of the function.
1171 fn insert_stack_check(
1172 &self,
1173 stack_limit: Reg,
1174 stack_size: u32,
1175 insts: &mut SmallInstVec<M::I>,
1176 ) {
1177 // With no explicit stack allocated we can just emit the simple check of
1178 // the stack registers against the stack limit register, and trap if
1179 // it's out of bounds.
1180 if stack_size == 0 {
1181 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1182 return;
1183 }
1184
1185 // Note that the 32k stack size here is pretty special. See the
1186 // documentation in x86/abi.rs for why this is here. The general idea is
1187 // that we're protecting against overflow in the addition that happens
1188 // below.
1189 if stack_size >= 32 * 1024 {
1190 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1191 }
1192
1193 // Add the `stack_size` to `stack_limit`, placing the result in
1194 // `scratch`.
1195 //
1196 // Note though that `stack_limit`'s register may be the same as
1197 // `scratch`. If our stack size doesn't fit into an immediate this
1198 // means we need a second scratch register for loading the stack size
1199 // into a register.
1200 let scratch = Writable::from_reg(M::get_stacklimit_reg());
1201 insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter());
1202 insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
1203 }
1204}
1205
1206/// Generates the instructions necessary for the `gv` to be materialized into a
1207/// register.
1208///
1209/// This function will return a register that will contain the result of
1210/// evaluating `gv`. It will also return any instructions necessary to calculate
1211/// the value of the register.
1212///
1213/// Note that global values are typically lowered to instructions via the
1214/// standard legalization pass. Unfortunately though prologue generation happens
1215/// so late in the pipeline that we can't use these legalization passes to
1216/// generate the instructions for `gv`. As a result we duplicate some lowering
1217/// of `gv` here and support only some global values. This is similar to what
1218/// the x86 backend does for now, and hopefully this can be somewhat cleaned up
1219/// in the future too!
1220///
1221/// Also note that this function will make use of `writable_spilltmp_reg()` as a
1222/// temporary register to store values in if necessary. Currently after we write
1223/// to this register there's guaranteed to be no spilled values between where
1224/// it's used, because we're not participating in register allocation anyway!
1225fn gen_stack_limit<M: ABIMachineSpec>(
1226 f: &ir::Function,
1227 sigs: &SigSet,
1228 sig: Sig,
1229 gv: ir::GlobalValue,
1230) -> (Reg, SmallInstVec<M::I>) {
1231 let mut insts = smallvec![];
1232 let reg = generate_gv::<M>(f, sigs, sig, gv, &mut insts);
1233 return (reg, insts);
1234}
1235
1236fn generate_gv<M: ABIMachineSpec>(
1237 f: &ir::Function,
1238 sigs: &SigSet,
1239 sig: Sig,
1240 gv: ir::GlobalValue,
1241 insts: &mut SmallInstVec<M::I>,
1242) -> Reg {
1243 match f.global_values[gv] {
1244 // Return the direct register the vmcontext is in
1245 ir::GlobalValueData::VMContext => {
1246 get_special_purpose_param_register(f, sigs, sig, ir::ArgumentPurpose::VMContext)
1247 .expect("no vmcontext parameter found")
1248 }
1249 // Load our base value into a register, then load from that register
1250 // in to a temporary register.
1251 ir::GlobalValueData::Load {
1252 base,
1253 offset,
1254 global_type: _,
1255 readonly: _,
1256 } => {
1257 let base = generate_gv::<M>(f, sigs, sig, base, insts);
1258 let into_reg = Writable::from_reg(M::get_stacklimit_reg());
1259 insts.push(M::gen_load_base_offset(
1260 into_reg,
1261 base,
1262 offset.into(),
1263 M::word_type(),
1264 ));
1265 return into_reg.to_reg();
1266 }
1267 ref other => panic!("global value for stack limit not supported: {}", other),
1268 }
1269}
1270
1271fn gen_load_stack_multi<M: ABIMachineSpec>(
1272 from: StackAMode,
1273 dst: ValueRegs<Writable<Reg>>,
1274 ty: Type,
1275) -> SmallInstVec<M::I> {
1276 let mut ret = smallvec![];
1277 let (_, tys) = M::I::rc_for_type(ty).unwrap();
1278 let mut offset = 0;
1279 // N.B.: registers are given in the `ValueRegs` in target endian order.
1280 for (&dst, &ty) in dst.regs().iter().zip(tys.iter()) {
1281 ret.push(M::gen_load_stack(from.offset(offset), dst, ty));
1282 offset += ty.bytes() as i64;
1283 }
1284 ret
1285}
1286
1287fn gen_store_stack_multi<M: ABIMachineSpec>(
1288 from: StackAMode,
1289 src: ValueRegs<Reg>,
1290 ty: Type,
1291) -> SmallInstVec<M::I> {
1292 let mut ret = smallvec![];
1293 let (_, tys) = M::I::rc_for_type(ty).unwrap();
1294 let mut offset = 0;
1295 // N.B.: registers are given in the `ValueRegs` in target endian order.
1296 for (&src, &ty) in src.regs().iter().zip(tys.iter()) {
1297 ret.push(M::gen_store_stack(from.offset(offset), src, ty));
1298 offset += ty.bytes() as i64;
1299 }
1300 ret
1301}
1302
1303/// If the signature needs to be legalized, then return the struct-return
1304/// parameter that should be prepended to its returns. Otherwise, return `None`.
1305fn missing_struct_return(sig: &ir::Signature) -> Option<ir::AbiParam> {
1306 let struct_ret_index = sig.special_param_index(ArgumentPurpose::StructReturn)?;
1307 if !sig.uses_special_return(ArgumentPurpose::StructReturn) {
1308 return Some(sig.params[struct_ret_index]);
1309 }
1310
1311 None
1312}
1313
1314fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
1315 let mut sig = sig.clone();
1316 if let Some(sret) = missing_struct_return(&sig) {
1317 sig.returns.insert(0, sret);
1318 }
1319 sig
1320}
1321
1322/// ### Pre-Regalloc Functions
1323///
1324/// These methods of `Callee` may only be called before regalloc.
1325impl<M: ABIMachineSpec> Callee<M> {
1326 /// Access the (possibly legalized) signature.
1327 pub fn signature(&self) -> &ir::Signature {
1328 debug_assert!(
1329 missing_struct_return(&self.ir_sig).is_none(),
1330 "`Callee::ir_sig` is always legalized"
1331 );
1332 &self.ir_sig
1333 }
1334
1335 /// Does the ABI-body code need temp registers (and if so, of what type)?
1336 /// They will be provided to `init()` as the `temps` arg if so.
1337 pub fn temps_needed(&self, sigs: &SigSet) -> Vec<Type> {
1338 let mut temp_tys = vec![];
1339 for arg in sigs.args(self.sig) {
1340 match arg {
1341 &ABIArg::ImplicitPtrArg { pointer, .. } => match &pointer {
1342 &ABIArgSlot::Reg { .. } => {}
1343 &ABIArgSlot::Stack { ty, .. } => {
1344 temp_tys.push(ty);
1345 }
1346 },
1347 _ => {}
1348 }
1349 }
1350 if sigs[self.sig].stack_ret_arg.is_some() {
1351 temp_tys.push(M::word_type());
1352 }
1353 temp_tys
1354 }
1355
1356 /// Initialize. This is called after the Callee is constructed because it
1357 /// may be provided with a vector of temp vregs, which can only be allocated
1358 /// once the lowering context exists.
1359 pub fn init(&mut self, sigs: &SigSet, temps: Vec<Writable<Reg>>) {
1360 let mut temps_iter = temps.into_iter();
1361 for arg in sigs.args(self.sig) {
1362 let temp = match arg {
1363 &ABIArg::ImplicitPtrArg { pointer, .. } => match &pointer {
1364 &ABIArgSlot::Reg { .. } => None,
1365 &ABIArgSlot::Stack { .. } => Some(temps_iter.next().unwrap()),
1366 },
1367 _ => None,
1368 };
1369 self.arg_temp_reg.push(temp);
1370 }
1371 if sigs[self.sig].stack_ret_arg.is_some() {
1372 self.ret_area_ptr = Some(temps_iter.next().unwrap());
1373 }
1374 }
1375
1376 /// Accumulate outgoing arguments.
1377 ///
1378 /// This ensures that at least `size` bytes are allocated in the prologue to
1379 /// be available for use in function calls to hold arguments and/or return
1380 /// values. If this function is called multiple times, the maximum of all
1381 /// `size` values will be available.
1382 pub fn accumulate_outgoing_args_size(&mut self, size: u32) {
1383 if size > self.outgoing_args_size {
1384 self.outgoing_args_size = size;
1385 }
1386 }
1387
1388 pub fn is_forward_edge_cfi_enabled(&self) -> bool {
1389 self.isa_flags.is_forward_edge_cfi_enabled()
1390 }
1391
1392 /// Get the calling convention implemented by this ABI object.
1393 pub fn call_conv(&self, sigs: &SigSet) -> isa::CallConv {
1394 sigs[self.sig].call_conv
1395 }
1396
1397 /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes.
1398 pub fn sized_stackslot_offsets(&self) -> &PrimaryMap<StackSlot, u32> {
1399 &self.sized_stackslots
1400 }
1401
1402 /// The offsets of all dynamic stack slots (not spill slots) for debuginfo purposes.
1403 pub fn dynamic_stackslot_offsets(&self) -> &PrimaryMap<DynamicStackSlot, u32> {
1404 &self.dynamic_stackslots
1405 }
1406
1407 /// Generate an instruction which copies an argument to a destination
1408 /// register.
1409 pub fn gen_copy_arg_to_regs(
1410 &mut self,
1411 sigs: &SigSet,
1412 idx: usize,
1413 into_regs: ValueRegs<Writable<Reg>>,
1414 vregs: &mut VRegAllocator<M::I>,
1415 ) -> SmallInstVec<M::I> {
1416 let mut insts = smallvec![];
1417 let mut copy_arg_slot_to_reg = |slot: &ABIArgSlot, into_reg: &Writable<Reg>| {
1418 match slot {
1419 &ABIArgSlot::Reg { reg, .. } => {
1420 // Add a preg -> def pair to the eventual `args`
1421 // instruction. Extension mode doesn't matter
1422 // (we're copying out, not in; we ignore high bits
1423 // by convention).
1424 let arg = ArgPair {
1425 vreg: *into_reg,
1426 preg: reg.into(),
1427 };
1428 self.reg_args.push(arg);
1429 }
1430 &ABIArgSlot::Stack {
1431 offset,
1432 ty,
1433 extension,
1434 ..
1435 } => {
1436 // However, we have to respect the extention mode for stack
1437 // slots, or else we grab the wrong bytes on big-endian.
1438 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1439 let ty = match (ext, ty_bits(ty) as u32) {
1440 (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
1441 if n < M::word_bits() =>
1442 {
1443 M::word_type()
1444 }
1445 _ => ty,
1446 };
1447 insts.push(M::gen_load_stack(
1448 StackAMode::FPOffset(
1449 M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
1450 ty,
1451 ),
1452 *into_reg,
1453 ty,
1454 ));
1455 }
1456 }
1457 };
1458
1459 match &sigs.args(self.sig)[idx] {
1460 &ABIArg::Slots { ref slots, .. } => {
1461 assert_eq!(into_regs.len(), slots.len());
1462 for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
1463 copy_arg_slot_to_reg(&slot, &into_reg);
1464 }
1465 }
1466 &ABIArg::StructArg {
1467 pointer, offset, ..
1468 } => {
1469 let into_reg = into_regs.only_reg().unwrap();
1470 if let Some(slot) = pointer {
1471 // Buffer address is passed in a register or stack slot.
1472 copy_arg_slot_to_reg(&slot, &into_reg);
1473 } else {
1474 // Buffer address is implicitly defined by the ABI.
1475 insts.push(M::gen_get_stack_addr(
1476 StackAMode::FPOffset(
1477 M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
1478 I8,
1479 ),
1480 into_reg,
1481 I8,
1482 ));
1483 }
1484 }
1485 &ABIArg::ImplicitPtrArg { pointer, ty, .. } => {
1486 let into_reg = into_regs.only_reg().unwrap();
1487 // We need to dereference the pointer.
1488 let base = match &pointer {
1489 &ABIArgSlot::Reg { reg, ty, .. } => {
1490 let tmp = vregs.alloc(ty).unwrap().only_reg().unwrap();
1491 self.reg_args.push(ArgPair {
1492 vreg: Writable::from_reg(tmp),
1493 preg: reg.into(),
1494 });
1495 tmp
1496 }
1497 &ABIArgSlot::Stack { offset, ty, .. } => {
1498 // In this case we need a temp register to hold the address.
1499 // This was allocated in the `init` routine.
1500 let addr_reg = self.arg_temp_reg[idx].unwrap();
1501 insts.push(M::gen_load_stack(
1502 StackAMode::FPOffset(
1503 M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
1504 ty,
1505 ),
1506 addr_reg,
1507 ty,
1508 ));
1509 addr_reg.to_reg()
1510 }
1511 };
1512 insts.push(M::gen_load_base_offset(into_reg, base, 0, ty));
1513 }
1514 }
1515 insts
1516 }
1517
1518 /// Is the given argument needed in the body (as opposed to, e.g., serving
1519 /// only as a special ABI-specific placeholder)? This controls whether
1520 /// lowering will copy it to a virtual reg use by CLIF instructions.
1521 pub fn arg_is_needed_in_body(&self, _idx: usize) -> bool {
1522 true
1523 }
1524
1525 /// Generate an instruction which copies a source register to a return value slot.
1526 pub fn gen_copy_regs_to_retval(
1527 &self,
1528 sigs: &SigSet,
1529 idx: usize,
1530 from_regs: ValueRegs<Reg>,
1531 vregs: &mut VRegAllocator<M::I>,
1532 ) -> (SmallVec<[RetPair; 2]>, SmallInstVec<M::I>) {
1533 let mut reg_pairs = smallvec![];
1534 let mut ret = smallvec![];
1535 let word_bits = M::word_bits() as u8;
1536 match &sigs.rets(self.sig)[idx] {
1537 &ABIArg::Slots { ref slots, .. } => {
1538 assert_eq!(from_regs.len(), slots.len());
1539 for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
1540 match slot {
1541 &ABIArgSlot::Reg {
1542 reg, ty, extension, ..
1543 } => {
1544 let from_bits = ty_bits(ty) as u8;
1545 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1546 let vreg = match (ext, from_bits) {
1547 (ir::ArgumentExtension::Uext, n)
1548 | (ir::ArgumentExtension::Sext, n)
1549 if n < word_bits =>
1550 {
1551 let signed = ext == ir::ArgumentExtension::Sext;
1552 let dst = writable_value_regs(vregs.alloc(ty).unwrap())
1553 .only_reg()
1554 .unwrap();
1555 ret.push(M::gen_extend(
1556 dst, from_reg, signed, from_bits,
1557 /* to_bits = */ word_bits,
1558 ));
1559 dst.to_reg()
1560 }
1561 _ => {
1562 // No move needed, regalloc2 will emit it using the constraint
1563 // added by the RetPair.
1564 from_reg
1565 }
1566 };
1567 reg_pairs.push(RetPair {
1568 vreg,
1569 preg: Reg::from(reg),
1570 });
1571 }
1572 &ABIArgSlot::Stack {
1573 offset,
1574 ty,
1575 extension,
1576 ..
1577 } => {
1578 let mut ty = ty;
1579 let from_bits = ty_bits(ty) as u8;
1580 // A machine ABI implementation should ensure that stack frames
1581 // have "reasonable" size. All current ABIs for machinst
1582 // backends (aarch64 and x64) enforce a 128MB limit.
1583 let off = i32::try_from(offset).expect(
1584 "Argument stack offset greater than 2GB; should hit impl limit first",
1585 );
1586 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1587 // Trash the from_reg; it should be its last use.
1588 match (ext, from_bits) {
1589 (ir::ArgumentExtension::Uext, n)
1590 | (ir::ArgumentExtension::Sext, n)
1591 if n < word_bits =>
1592 {
1593 assert_eq!(M::word_reg_class(), from_reg.class());
1594 let signed = ext == ir::ArgumentExtension::Sext;
1595 let dst = writable_value_regs(vregs.alloc(ty).unwrap())
1596 .only_reg()
1597 .unwrap();
1598 ret.push(M::gen_extend(
1599 dst, from_reg, signed, from_bits,
1600 /* to_bits = */ word_bits,
1601 ));
1602 // Store the extended version.
1603 ty = M::word_type();
1604 }
1605 _ => {}
1606 };
1607 ret.push(M::gen_store_base_offset(
1608 self.ret_area_ptr.unwrap().to_reg(),
1609 off,
1610 from_reg,
1611 ty,
1612 ));
1613 }
1614 }
1615 }
1616 }
1617 ABIArg::StructArg { .. } => {
1618 panic!("StructArg in return position is unsupported");
1619 }
1620 ABIArg::ImplicitPtrArg { .. } => {
1621 panic!("ImplicitPtrArg in return position is unsupported");
1622 }
1623 }
1624 (reg_pairs, ret)
1625 }
1626
1627 /// Generate any setup instruction needed to save values to the
1628 /// return-value area. This is usually used when were are multiple return
1629 /// values or an otherwise large return value that must be passed on the
1630 /// stack; typically the ABI specifies an extra hidden argument that is a
1631 /// pointer to that memory.
1632 pub fn gen_retval_area_setup(
1633 &mut self,
1634 sigs: &SigSet,
1635 vregs: &mut VRegAllocator<M::I>,
1636 ) -> Option<M::I> {
1637 if let Some(i) = sigs[self.sig].stack_ret_arg {
1638 let insts = self.gen_copy_arg_to_regs(
1639 sigs,
1640 i.into(),
1641 ValueRegs::one(self.ret_area_ptr.unwrap()),
1642 vregs,
1643 );
1644 insts.into_iter().next().map(|inst| {
1645 trace!(
1646 "gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
1647 inst,
1648 self.ret_area_ptr.unwrap().to_reg()
1649 );
1650 inst
1651 })
1652 } else {
1653 trace!("gen_retval_area_setup: not needed");
1654 None
1655 }
1656 }
1657
1658 /// Generate a return instruction.
1659 pub fn gen_ret(&self, rets: Vec<RetPair>) -> M::I {
1660 M::gen_ret(self.setup_frame, &self.isa_flags, rets)
1661 }
1662
1663 /// Produce an instruction that computes a sized stackslot address.
1664 pub fn sized_stackslot_addr(
1665 &self,
1666 slot: StackSlot,
1667 offset: u32,
1668 into_reg: Writable<Reg>,
1669 ) -> M::I {
1670 // Offset from beginning of stackslot area, which is at nominal SP (see
1671 // [MemArg::NominalSPOffset] for more details on nominal SP tracking).
1672 let stack_off = self.sized_stackslots[slot] as i64;
1673 let sp_off: i64 = stack_off + (offset as i64);
1674 M::gen_get_stack_addr(StackAMode::NominalSPOffset(sp_off, I8), into_reg, I8)
1675 }
1676
1677 /// Produce an instruction that computes a dynamic stackslot address.
1678 pub fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable<Reg>) -> M::I {
1679 let stack_off = self.dynamic_stackslots[slot] as i64;
1680 M::gen_get_stack_addr(
1681 StackAMode::NominalSPOffset(stack_off, I64X2XN),
1682 into_reg,
1683 I64X2XN,
1684 )
1685 }
1686
1687 /// Load from a spillslot.
1688 pub fn load_spillslot(
1689 &self,
1690 slot: SpillSlot,
1691 ty: Type,
1692 into_regs: ValueRegs<Writable<Reg>>,
1693 ) -> SmallInstVec<M::I> {
1694 // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
1695 let islot = slot.index() as i64;
1696 let spill_off = islot * M::word_bytes() as i64;
1697 let sp_off = self.stackslots_size as i64 + spill_off;
1698 trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
1699
1700 gen_load_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty)
1701 }
1702
1703 /// Store to a spillslot.
1704 pub fn store_spillslot(
1705 &self,
1706 slot: SpillSlot,
1707 ty: Type,
1708 from_regs: ValueRegs<Reg>,
1709 ) -> SmallInstVec<M::I> {
1710 // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
1711 let islot = slot.index() as i64;
1712 let spill_off = islot * M::word_bytes() as i64;
1713 let sp_off = self.stackslots_size as i64 + spill_off;
1714 trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
1715
1716 gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
1717 }
1718
1719 /// Get an `args` pseudo-inst, if any, that should appear at the
1720 /// very top of the function body prior to regalloc.
1721 pub fn take_args(&mut self) -> Option<M::I> {
1722 if self.reg_args.len() > 0 {
1723 // Very first instruction is an `args` pseudo-inst that
1724 // establishes live-ranges for in-register arguments and
1725 // constrains them at the start of the function to the
1726 // locations defined by the ABI.
1727 Some(M::gen_args(
1728 &self.isa_flags,
1729 std::mem::take(&mut self.reg_args),
1730 ))
1731 } else {
1732 None
1733 }
1734 }
1735}
1736
1737/// ### Post-Regalloc Functions
1738///
1739/// These methods of `Callee` may only be called after
1740/// regalloc.
1741impl<M: ABIMachineSpec> Callee<M> {
1742 /// Update with the number of spillslots, post-regalloc.
1743 pub fn set_num_spillslots(&mut self, slots: usize) {
1744 self.spillslots = Some(slots);
1745 }
1746
1747 /// Update with the clobbered registers, post-regalloc.
1748 pub fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
1749 self.clobbered = clobbered;
1750 }
1751
1752 /// Generate a stack map, given a list of spillslots and the emission state
1753 /// at a given program point (prior to emission of the safepointing
1754 /// instruction).
1755 pub fn spillslots_to_stack_map(
1756 &self,
1757 slots: &[SpillSlot],
1758 state: &<M::I as MachInstEmit>::State,
1759 ) -> StackMap {
1760 let virtual_sp_offset = M::get_virtual_sp_offset_from_state(state);
1761 let nominal_sp_to_fp = M::get_nominal_sp_to_fp(state);
1762 assert!(virtual_sp_offset >= 0);
1763 trace!(
1764 "spillslots_to_stackmap: slots = {:?}, state = {:?}",
1765 slots,
1766 state
1767 );
1768 let map_size = (virtual_sp_offset + nominal_sp_to_fp) as u32;
1769 let bytes = M::word_bytes();
1770 let map_words = (map_size + bytes - 1) / bytes;
1771 let mut bits = std::iter::repeat(false)
1772 .take(map_words as usize)
1773 .collect::<Vec<bool>>();
1774
1775 let first_spillslot_word =
1776 ((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
1777 for &slot in slots {
1778 let slot = slot.index();
1779 bits[first_spillslot_word + slot] = true;
1780 }
1781
1782 StackMap::from_slice(&bits[..])
1783 }
1784
1785 /// Generate a prologue, post-regalloc.
1786 ///
1787 /// This should include any stack frame or other setup necessary to use the
1788 /// other methods (`load_arg`, `store_retval`, and spillslot accesses.)
1789 /// `self` is mutable so that we can store information in it which will be
1790 /// useful when creating the epilogue.
1791 pub fn gen_prologue(&mut self, sigs: &SigSet) -> SmallInstVec<M::I> {
1792 let bytes = M::word_bytes();
1793 let total_stacksize = self.stackslots_size + bytes * self.spillslots.unwrap() as u32;
1794 let mask = M::stack_align(self.call_conv) - 1;
1795 let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
1796 let clobbered_callee_saves = M::get_clobbered_callee_saves(
1797 self.call_conv,
1798 &self.flags,
1799 self.signature(),
1800 &self.clobbered,
1801 );
1802 let mut insts = smallvec![];
1803
1804 self.fixed_frame_storage_size += total_stacksize;
1805 self.setup_frame = self.flags.preserve_frame_pointers()
1806 || M::is_frame_setup_needed(
1807 self.is_leaf,
1808 self.stack_args_size(sigs),
1809 clobbered_callee_saves.len(),
1810 self.fixed_frame_storage_size,
1811 );
1812
1813 insts.extend(
1814 M::gen_prologue_start(
1815 self.setup_frame,
1816 self.call_conv,
1817 &self.flags,
1818 &self.isa_flags,
1819 )
1820 .into_iter(),
1821 );
1822
1823 if self.setup_frame {
1824 // set up frame
1825 insts.extend(M::gen_prologue_frame_setup(&self.flags).into_iter());
1826 }
1827
1828 // Leaf functions with zero stack don't need a stack check if one's
1829 // specified, otherwise always insert the stack check.
1830 if total_stacksize > 0 || !self.is_leaf {
1831 if let Some((reg, stack_limit_load)) = &self.stack_limit {
1832 insts.extend(stack_limit_load.clone());
1833 self.insert_stack_check(*reg, total_stacksize, &mut insts);
1834 }
1835
1836 let needs_probestack = self
1837 .probestack_min_frame
1838 .map_or(false, |min_frame| total_stacksize >= min_frame);
1839
1840 if needs_probestack {
1841 match self.flags.probestack_strategy() {
1842 ProbestackStrategy::Inline => {
1843 let guard_size = 1 << self.flags.probestack_size_log2();
1844 M::gen_inline_probestack(&mut insts, total_stacksize, guard_size)
1845 }
1846 ProbestackStrategy::Outline => M::gen_probestack(&mut insts, total_stacksize),
1847 }
1848 }
1849 }
1850
1851 // Save clobbered registers.
1852 let (clobber_size, clobber_insts) = M::gen_clobber_save(
1853 self.call_conv,
1854 self.setup_frame,
1855 &self.flags,
1856 &clobbered_callee_saves,
1857 self.fixed_frame_storage_size,
1858 self.outgoing_args_size,
1859 );
1860 insts.extend(clobber_insts);
1861
1862 // N.B.: "nominal SP", which we use to refer to stackslots and
1863 // spillslots, is defined to be equal to the stack pointer at this point
1864 // in the prologue.
1865 //
1866 // If we push any further data onto the stack in the function
1867 // body, we emit a virtual-SP adjustment meta-instruction so
1868 // that the nominal SP references behave as if SP were still
1869 // at this point. See documentation for
1870 // [crate::machinst::abi](this module) for more details
1871 // on stackframe layout and nominal SP maintenance.
1872
1873 self.total_frame_size = Some(total_stacksize + clobber_size as u32);
1874 insts
1875 }
1876
1877 /// Generate an epilogue, post-regalloc.
1878 ///
1879 /// Note that this must generate the actual return instruction (rather than
1880 /// emitting this in the lowering logic), because the epilogue code comes
1881 /// before the return and the two are likely closely related.
1882 pub fn gen_epilogue(&self) -> SmallInstVec<M::I> {
1883 let mut insts = smallvec![];
1884
1885 // Restore clobbered registers.
1886 insts.extend(M::gen_clobber_restore(
1887 self.call_conv,
1888 self.signature(),
1889 &self.flags,
1890 &self.clobbered,
1891 self.fixed_frame_storage_size,
1892 self.outgoing_args_size,
1893 ));
1894
1895 // N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no
1896 // references to nominal SP offsets before the return below, and (ii) the instruction
1897 // emission tracks running SP offset linearly (in straight-line order), not according to
1898 // the CFG, so early returns in the middle of function bodies would cause an incorrect
1899 // offset for the rest of the body.
1900
1901 if self.setup_frame {
1902 insts.extend(M::gen_epilogue_frame_restore(&self.flags));
1903 }
1904
1905 // This `ret` doesn't need any return registers attached
1906 // because we are post-regalloc and don't need to
1907 // represent the implicit uses anymore.
1908 insts.push(M::gen_ret(self.setup_frame, &self.isa_flags, vec![]));
1909
1910 trace!("Epilogue: {:?}", insts);
1911 insts
1912 }
1913
1914 /// Returns the full frame size for the given function, after prologue
1915 /// emission has run. This comprises the spill slots and stack-storage slots
1916 /// (but not storage for clobbered callee-save registers, arguments pushed
1917 /// at callsites within this function, or other ephemeral pushes).
1918 pub fn frame_size(&self) -> u32 {
1919 self.total_frame_size
1920 .expect("frame size not computed before prologue generation")
1921 }
1922
1923 /// Returns the size of arguments expected on the stack.
1924 pub fn stack_args_size(&self, sigs: &SigSet) -> u32 {
1925 sigs[self.sig].sized_stack_arg_space
1926 }
1927
1928 /// Get the spill-slot size.
1929 pub fn get_spillslot_size(&self, rc: RegClass) -> u32 {
1930 let max = if self.dynamic_type_sizes.len() == 0 {
1931 16
1932 } else {
1933 *self
1934 .dynamic_type_sizes
1935 .iter()
1936 .max_by(|x, y| x.1.cmp(&y.1))
1937 .map(|(_k, v)| v)
1938 .unwrap()
1939 };
1940 M::get_number_of_spillslots_for_value(rc, max)
1941 }
1942
1943 /// Generate a spill.
1944 pub fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> M::I {
1945 let ty = M::I::canonical_type_for_rc(Reg::from(from_reg).class());
1946 self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
1947 .into_iter()
1948 .next()
1949 .unwrap()
1950 }
1951
1952 /// Generate a reload (fill).
1953 pub fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> M::I {
1954 let ty = M::I::canonical_type_for_rc(to_reg.to_reg().class());
1955 self.load_spillslot(
1956 from_slot,
1957 ty,
1958 writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
1959 )
1960 .into_iter()
1961 .next()
1962 .unwrap()
1963 }
1964}
1965
1966/// An input argument to a call instruction: the vreg that is used,
1967/// and the preg it is constrained to (per the ABI).
1968#[derive(Clone, Debug)]
1969pub struct CallArgPair {
1970 /// The virtual register to use for the argument.
1971 pub vreg: Reg,
1972 /// The real register into which the arg goes.
1973 pub preg: Reg,
1974}
1975
1976/// An output return value from a call instruction: the vreg that is
1977/// defined, and the preg it is constrained to (per the ABI).
1978#[derive(Clone, Debug)]
1979pub struct CallRetPair {
1980 /// The virtual register to define from this return value.
1981 pub vreg: Writable<Reg>,
1982 /// The real register from which the return value is read.
1983 pub preg: Reg,
1984}
1985
1986pub type CallArgList = SmallVec<[CallArgPair; 8]>;
1987pub type CallRetList = SmallVec<[CallRetPair; 8]>;
1988
1989/// ABI object for a callsite.
1990pub struct Caller<M: ABIMachineSpec> {
1991 /// The called function's signature.
1992 sig: Sig,
1993 /// All register uses for the callsite, i.e., function args, with
1994 /// VReg and the physical register it is constrained to.
1995 uses: CallArgList,
1996 /// All defs for the callsite, i.e., return values.
1997 defs: CallRetList,
1998 /// Caller-save clobbers.
1999 clobbers: PRegSet,
2000 /// Call destination.
2001 dest: CallDest,
2002 /// Actual call opcode; used to distinguish various types of calls.
2003 opcode: ir::Opcode,
2004 /// Caller's calling convention.
2005 caller_conv: isa::CallConv,
2006 /// The settings controlling this compilation.
2007 flags: settings::Flags,
2008
2009 _mach: PhantomData<M>,
2010}
2011
2012/// Destination for a call.
2013#[derive(Debug, Clone)]
2014pub enum CallDest {
2015 /// Call to an ExtName (named function symbol).
2016 ExtName(ir::ExternalName, RelocDistance),
2017 /// Indirect call to a function pointer in a register.
2018 Reg(Reg),
2019}
2020
2021impl<M: ABIMachineSpec> Caller<M> {
2022 /// Create a callsite ABI object for a call directly to the specified function.
2023 pub fn from_func(
2024 sigs: &SigSet,
2025 sig_ref: ir::SigRef,
2026 extname: &ir::ExternalName,
2027 dist: RelocDistance,
2028 caller_conv: isa::CallConv,
2029 flags: settings::Flags,
2030 ) -> CodegenResult<Caller<M>> {
2031 let sig = sigs.abi_sig_for_sig_ref(sig_ref);
2032 let clobbers = sigs.call_clobbers::<M>(sig);
2033 Ok(Caller {
2034 sig,
2035 uses: smallvec![],
2036 defs: smallvec![],
2037 clobbers,
2038 dest: CallDest::ExtName(extname.clone(), dist),
2039 opcode: ir::Opcode::Call,
2040 caller_conv,
2041 flags,
2042 _mach: PhantomData,
2043 })
2044 }
2045
2046 /// Create a callsite ABI object for a call directly to the specified
2047 /// libcall.
2048 pub fn from_libcall(
2049 sigs: &SigSet,
2050 sig: &ir::Signature,
2051 extname: &ir::ExternalName,
2052 dist: RelocDistance,
2053 caller_conv: isa::CallConv,
2054 flags: settings::Flags,
2055 ) -> CodegenResult<Caller<M>> {
2056 let sig = sigs.abi_sig_for_signature(sig);
2057 let clobbers = sigs.call_clobbers::<M>(sig);
2058 Ok(Caller {
2059 sig,
2060 uses: smallvec![],
2061 defs: smallvec![],
2062 clobbers,
2063 dest: CallDest::ExtName(extname.clone(), dist),
2064 opcode: ir::Opcode::Call,
2065 caller_conv,
2066 flags,
2067 _mach: PhantomData,
2068 })
2069 }
2070
2071 /// Create a callsite ABI object for a call to a function pointer with the
2072 /// given signature.
2073 pub fn from_ptr(
2074 sigs: &SigSet,
2075 sig_ref: ir::SigRef,
2076 ptr: Reg,
2077 opcode: ir::Opcode,
2078 caller_conv: isa::CallConv,
2079 flags: settings::Flags,
2080 ) -> CodegenResult<Caller<M>> {
2081 let sig = sigs.abi_sig_for_sig_ref(sig_ref);
2082 let clobbers = sigs.call_clobbers::<M>(sig);
2083 Ok(Caller {
2084 sig,
2085 uses: smallvec![],
2086 defs: smallvec![],
2087 clobbers,
2088 dest: CallDest::Reg(ptr),
2089 opcode,
2090 caller_conv,
2091 flags,
2092 _mach: PhantomData,
2093 })
2094 }
2095}
2096
2097fn adjust_stack_and_nominal_sp<M: ABIMachineSpec>(ctx: &mut Lower<M::I>, off: i32, is_sub: bool) {
2098 if off == 0 {
2099 return;
2100 }
2101 let amt = if is_sub { -off } else { off };
2102 for inst in M::gen_sp_reg_adjust(amt) {
2103 ctx.emit(inst);
2104 }
2105 ctx.emit(M::gen_nominal_sp_adj(-amt));
2106}
2107
2108impl<M: ABIMachineSpec> Caller<M> {
2109 /// Get the number of arguments expected.
2110 pub fn num_args(&self, sigs: &SigSet) -> usize {
2111 sigs.num_args(self.sig)
2112 }
2113
2114 /// Emit code to pre-adjust the stack, prior to argument copies and call.
2115 pub fn emit_stack_pre_adjust(&self, ctx: &mut Lower<M::I>) {
2116 let off =
2117 ctx.sigs()[self.sig].sized_stack_arg_space + ctx.sigs()[self.sig].sized_stack_ret_space;
2118 adjust_stack_and_nominal_sp::<M>(ctx, off as i32, /* is_sub = */ true)
2119 }
2120
2121 /// Emit code to post-adjust the satck, after call return and return-value copies.
2122 pub fn emit_stack_post_adjust(&self, ctx: &mut Lower<M::I>) {
2123 let off =
2124 ctx.sigs()[self.sig].sized_stack_arg_space + ctx.sigs()[self.sig].sized_stack_ret_space;
2125 adjust_stack_and_nominal_sp::<M>(ctx, off as i32, /* is_sub = */ false)
2126 }
2127
2128 /// Emit a copy of a large argument into its associated stack buffer, if any.
2129 /// We must be careful to perform all these copies (as necessary) before setting
2130 /// up the argument registers, since we may have to invoke memcpy(), which could
2131 /// clobber any registers already set up. The back-end should call this routine
2132 /// for all arguments before calling emit_copy_regs_to_arg for all arguments.
2133 pub fn emit_copy_regs_to_buffer(
2134 &self,
2135 ctx: &mut Lower<M::I>,
2136 idx: usize,
2137 from_regs: ValueRegs<Reg>,
2138 ) {
2139 match &ctx.sigs().args(self.sig)[idx] {
2140 &ABIArg::Slots { .. } => {}
2141 &ABIArg::StructArg { offset, size, .. } => {
2142 let src_ptr = from_regs.only_reg().unwrap();
2143 let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
2144 ctx.emit(M::gen_get_stack_addr(
2145 StackAMode::SPOffset(offset, I8),
2146 dst_ptr,
2147 I8,
2148 ));
2149 // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes.
2150 // N.B.: because we process StructArg params *first*, this is
2151 // safe w.r.t. clobbers: we have not yet filled in any other
2152 // arg regs.
2153 let memcpy_call_conv =
2154 isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv);
2155 for insn in M::gen_memcpy(
2156 memcpy_call_conv,
2157 dst_ptr.to_reg(),
2158 src_ptr,
2159 size as usize,
2160 |ty| ctx.alloc_tmp(ty).only_reg().unwrap(),
2161 )
2162 .into_iter()
2163 {
2164 ctx.emit(insn);
2165 }
2166 }
2167 &ABIArg::ImplicitPtrArg { .. } => unimplemented!(), // Only supported via ISLE.
2168 }
2169 }
2170
2171 /// Add a constraint for an argument value from a source register.
2172 /// For large arguments with associated stack buffer, this may
2173 /// load the address of the buffer into the argument register, if
2174 /// required by the ABI.
2175 pub fn gen_arg(
2176 &mut self,
2177 ctx: &mut Lower<M::I>,
2178 idx: usize,
2179 from_regs: ValueRegs<Reg>,
2180 ) -> SmallInstVec<M::I> {
2181 let mut insts = smallvec![];
2182 let word_rc = M::word_reg_class();
2183 let word_bits = M::word_bits() as usize;
2184
2185 // How many temps do we need for extends? Allocate them ahead
2186 // of time, since we can't do it while we're iterating over
2187 // the sig and immutably borrowing `ctx`.
2188 let needed_tmps = match &ctx.sigs().args(self.sig)[idx] {
2189 &ABIArg::Slots { ref slots, .. } => slots
2190 .iter()
2191 .map(|slot| match slot {
2192 &ABIArgSlot::Reg { extension, .. }
2193 if extension != ir::ArgumentExtension::None =>
2194 {
2195 1
2196 }
2197 &ABIArgSlot::Reg { ty, .. } if ty.is_ref() => 1,
2198 &ABIArgSlot::Reg { .. } => 0,
2199 &ABIArgSlot::Stack { extension, .. }
2200 if extension != ir::ArgumentExtension::None =>
2201 {
2202 1
2203 }
2204 &ABIArgSlot::Stack { .. } => 0,
2205 })
2206 .sum(),
2207 _ => 0,
2208 };
2209 let mut temps: SmallVec<[Writable<Reg>; 16]> = (0..needed_tmps)
2210 .map(|_| ctx.alloc_tmp(M::word_type()).only_reg().unwrap())
2211 .collect();
2212
2213 match &ctx.sigs().args(self.sig)[idx] {
2214 &ABIArg::Slots { ref slots, .. } => {
2215 assert_eq!(from_regs.len(), slots.len());
2216 for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
2217 match slot {
2218 &ABIArgSlot::Reg {
2219 reg, ty, extension, ..
2220 } => {
2221 let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension);
2222 if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
2223 assert_eq!(word_rc, reg.class());
2224 let signed = match ext {
2225 ir::ArgumentExtension::Uext => false,
2226 ir::ArgumentExtension::Sext => true,
2227 _ => unreachable!(),
2228 };
2229 let extend_result =
2230 temps.pop().expect("Must have allocated enough temps");
2231 insts.push(M::gen_extend(
2232 extend_result,
2233 *from_reg,
2234 signed,
2235 ty_bits(ty) as u8,
2236 word_bits as u8,
2237 ));
2238 self.uses.push(CallArgPair {
2239 vreg: extend_result.to_reg(),
2240 preg: reg.into(),
2241 });
2242 } else if ty.is_ref() {
2243 // Reference-typed args need to be
2244 // passed as a copy; the original vreg
2245 // is constrained to the stack and
2246 // this copy is in a reg.
2247 let ref_copy =
2248 temps.pop().expect("Must have allocated enough temps");
2249 insts.push(M::gen_move(ref_copy, *from_reg, M::word_type()));
2250 self.uses.push(CallArgPair {
2251 vreg: ref_copy.to_reg(),
2252 preg: reg.into(),
2253 });
2254 } else {
2255 self.uses.push(CallArgPair {
2256 vreg: *from_reg,
2257 preg: reg.into(),
2258 });
2259 }
2260 }
2261 &ABIArgSlot::Stack {
2262 offset,
2263 ty,
2264 extension,
2265 ..
2266 } => {
2267 let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension);
2268 let (data, ty) =
2269 if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
2270 assert_eq!(word_rc, from_reg.class());
2271 let signed = match ext {
2272 ir::ArgumentExtension::Uext => false,
2273 ir::ArgumentExtension::Sext => true,
2274 _ => unreachable!(),
2275 };
2276 let extend_result =
2277 temps.pop().expect("Must have allocated enough temps");
2278 insts.push(M::gen_extend(
2279 extend_result,
2280 *from_reg,
2281 signed,
2282 ty_bits(ty) as u8,
2283 word_bits as u8,
2284 ));
2285 // Store the extended version.
2286 (extend_result.to_reg(), M::word_type())
2287 } else {
2288 (*from_reg, ty)
2289 };
2290 insts.push(M::gen_store_stack(
2291 StackAMode::SPOffset(offset, ty),
2292 data,
2293 ty,
2294 ));
2295 }
2296 }
2297 }
2298 }
2299 &ABIArg::StructArg { pointer, .. } => {
2300 assert!(pointer.is_none()); // Only supported via ISLE.
2301 }
2302 &ABIArg::ImplicitPtrArg { .. } => unimplemented!(), // Only supported via ISLE.
2303 }
2304 insts
2305 }
2306
2307 /// Define a return value after the call returns.
2308 pub fn gen_retval(
2309 &mut self,
2310 ctx: &Lower<M::I>,
2311 idx: usize,
2312 into_regs: ValueRegs<Writable<Reg>>,
2313 ) -> SmallInstVec<M::I> {
2314 let mut insts = smallvec![];
2315 match &ctx.sigs().rets(self.sig)[idx] {
2316 &ABIArg::Slots { ref slots, .. } => {
2317 assert_eq!(into_regs.len(), slots.len());
2318 for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
2319 match slot {
2320 // Extension mode doesn't matter because we're copying out, not in,
2321 // and we ignore high bits in our own registers by convention.
2322 &ABIArgSlot::Reg { reg, .. } => {
2323 self.defs.push(CallRetPair {
2324 vreg: *into_reg,
2325 preg: reg.into(),
2326 });
2327 }
2328 &ABIArgSlot::Stack { offset, ty, .. } => {
2329 let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space();
2330 insts.push(M::gen_load_stack(
2331 StackAMode::SPOffset(offset + ret_area_base, ty),
2332 *into_reg,
2333 ty,
2334 ));
2335 }
2336 }
2337 }
2338 }
2339 &ABIArg::StructArg { .. } => {
2340 panic!("StructArg not supported in return position");
2341 }
2342 &ABIArg::ImplicitPtrArg { .. } => {
2343 panic!("ImplicitPtrArg not supported in return position");
2344 }
2345 }
2346 insts
2347 }
2348
2349 /// Emit the call itself.
2350 ///
2351 /// The returned instruction should have proper use- and def-sets according
2352 /// to the argument registers, return-value registers, and clobbered
2353 /// registers for this function signature in this ABI.
2354 ///
2355 /// (Arg registers are uses, and retval registers are defs. Clobbered
2356 /// registers are also logically defs, but should never be read; their
2357 /// values are "defined" (to the regalloc) but "undefined" in every other
2358 /// sense.)
2359 ///
2360 /// This function should only be called once, as it is allowed to re-use
2361 /// parts of the `Caller` object in emitting instructions.
2362 pub fn emit_call(&mut self, ctx: &mut Lower<M::I>) {
2363 let word_type = M::word_type();
2364 if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg {
2365 let rd = ctx.alloc_tmp(word_type).only_reg().unwrap();
2366 let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space();
2367 ctx.emit(M::gen_get_stack_addr(
2368 StackAMode::SPOffset(ret_area_base, I8),
2369 rd,
2370 I8,
2371 ));
2372 for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(rd.to_reg())) {
2373 ctx.emit(inst);
2374 }
2375 }
2376
2377 let (uses, defs) = (
2378 mem::replace(&mut self.uses, Default::default()),
2379 mem::replace(&mut self.defs, Default::default()),
2380 );
2381
2382 let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
2383 for inst in M::gen_call(
2384 &self.dest,
2385 uses,
2386 defs,
2387 self.clobbers,
2388 self.opcode,
2389 tmp,
2390 ctx.sigs()[self.sig].call_conv,
2391 self.caller_conv,
2392 )
2393 .into_iter()
2394 {
2395 ctx.emit(inst);
2396 }
2397 }
2398}
2399
2400#[cfg(test)]
2401mod tests {
2402 use super::SigData;
2403
2404 #[test]
2405 fn sig_data_size() {
2406 // The size of `SigData` is performance sensitive, so make sure
2407 // we don't regress it unintentionally.
2408 assert_eq!(std::mem::size_of::<SigData>(), 24);
2409 }
2410}