cranelift_codegen/isa/x64/
lower.rs

1//! Lowering rules for X64.
2
3// ISLE integration glue.
4pub(super) mod isle;
5
6use crate::ir::{types, ExternalName, Inst as IRInst, LibCall, Opcode, Type};
7use crate::isa::x64::abi::*;
8use crate::isa::x64::inst::args::*;
9use crate::isa::x64::inst::*;
10use crate::isa::{x64::X64Backend, CallConv};
11use crate::machinst::abi::SmallInstVec;
12use crate::machinst::lower::*;
13use crate::machinst::*;
14use crate::result::CodegenResult;
15use crate::settings::Flags;
16use smallvec::smallvec;
17use target_lexicon::Triple;
18
19//=============================================================================
20// Helpers for instruction lowering.
21
22fn is_int_or_ref_ty(ty: Type) -> bool {
23    match ty {
24        types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
25        types::R32 => panic!("shouldn't have 32-bits refs on x64"),
26        _ => false,
27    }
28}
29
30/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
31/// `op`.
32// TODO investigate failures with checking against the result index.
33fn matches_input(ctx: &mut Lower<Inst>, input: InsnInput, op: Opcode) -> Option<IRInst> {
34    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
35    inputs.inst.as_inst().and_then(|(src_inst, _)| {
36        let data = ctx.data(src_inst);
37        if data.opcode() == op {
38            return Some(src_inst);
39        }
40        None
41    })
42}
43
44/// Put the given input into possibly multiple registers, and mark it as used (side-effect).
45fn put_input_in_regs(ctx: &mut Lower<Inst>, spec: InsnInput) -> ValueRegs<Reg> {
46    let ty = ctx.input_ty(spec.insn, spec.input);
47    let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
48
49    if let Some(c) = input.constant {
50        // Generate constants fresh at each use to minimize long-range register pressure.
51        let from_bits = ty_bits(ty);
52        let (size, c) = if from_bits < 64 {
53            (OperandSize::Size32, c & ((1u64 << from_bits) - 1))
54        } else {
55            (OperandSize::Size64, c)
56        };
57        assert!(is_int_or_ref_ty(ty)); // Only used for addresses.
58        let cst_copy = ctx.alloc_tmp(ty);
59        ctx.emit(Inst::imm(size, c, cst_copy.only_reg().unwrap()));
60        non_writable_value_regs(cst_copy)
61    } else {
62        ctx.put_input_in_regs(spec.insn, spec.input)
63    }
64}
65
66/// Put the given input into a register, and mark it as used (side-effect).
67fn put_input_in_reg(ctx: &mut Lower<Inst>, spec: InsnInput) -> Reg {
68    put_input_in_regs(ctx, spec)
69        .only_reg()
70        .expect("Multi-register value not expected")
71}
72
73enum MergeableLoadSize {
74    /// The load size performed by a sinkable load merging operation is
75    /// precisely the size necessary for the type in question.
76    Exact,
77
78    /// Narrower-than-32-bit values are handled by ALU insts that are at least
79    /// 32 bits wide, which is normally OK as we ignore upper buts; but, if we
80    /// generate, e.g., a direct-from-memory 32-bit add for a byte value and
81    /// the byte is the last byte in a page, the extra data that we load is
82    /// incorrectly accessed. So we only allow loads to merge for
83    /// 32-bit-and-above widths.
84    Min32,
85}
86
87/// Determines whether a load operation (indicated by `src_insn`) can be merged
88/// into the current lowering point. If so, returns the address-base source (as
89/// an `InsnInput`) and an offset from that address from which to perform the
90/// load.
91fn is_mergeable_load(
92    ctx: &mut Lower<Inst>,
93    src_insn: IRInst,
94    size: MergeableLoadSize,
95) -> Option<(InsnInput, i32)> {
96    let insn_data = ctx.data(src_insn);
97    let inputs = ctx.num_inputs(src_insn);
98    if inputs != 1 {
99        return None;
100    }
101
102    // If this type is too small to get a merged load, don't merge the load.
103    let load_ty = ctx.output_ty(src_insn, 0);
104    if ty_bits(load_ty) < 32 {
105        match size {
106            MergeableLoadSize::Exact => {}
107            MergeableLoadSize::Min32 => return None,
108        }
109    }
110
111    // Just testing the opcode is enough, because the width will always match if
112    // the type does (and the type should match if the CLIF is properly
113    // constructed).
114    if insn_data.opcode() == Opcode::Load {
115        let offset = insn_data
116            .load_store_offset()
117            .expect("load should have offset");
118        Some((
119            InsnInput {
120                insn: src_insn,
121                input: 0,
122            },
123            offset,
124        ))
125    } else {
126        None
127    }
128}
129
130fn input_to_imm(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<u64> {
131    ctx.get_input_as_source_or_const(spec.insn, spec.input)
132        .constant
133}
134
135fn emit_vm_call(
136    ctx: &mut Lower<Inst>,
137    flags: &Flags,
138    triple: &Triple,
139    libcall: LibCall,
140    inputs: &[Reg],
141    outputs: &[Writable<Reg>],
142) -> CodegenResult<()> {
143    let extname = ExternalName::LibCall(libcall);
144
145    let dist = if flags.use_colocated_libcalls() {
146        RelocDistance::Near
147    } else {
148        RelocDistance::Far
149    };
150
151    // TODO avoid recreating signatures for every single Libcall function.
152    let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
153    let sig = libcall.signature(call_conv);
154    let caller_conv = ctx.abi().call_conv(ctx.sigs());
155
156    if !ctx.sigs().have_abi_sig_for_signature(&sig) {
157        ctx.sigs_mut()
158            .make_abi_sig_from_ir_signature::<X64ABIMachineSpec>(sig.clone(), flags)?;
159    }
160
161    let mut abi =
162        X64Caller::from_libcall(ctx.sigs(), &sig, &extname, dist, caller_conv, flags.clone())?;
163
164    abi.emit_stack_pre_adjust(ctx);
165
166    assert_eq!(inputs.len(), abi.num_args(ctx.sigs()));
167
168    for (i, input) in inputs.iter().enumerate() {
169        for inst in abi.gen_arg(ctx, i, ValueRegs::one(*input)) {
170            ctx.emit(inst);
171        }
172    }
173
174    let mut retval_insts: SmallInstVec<_> = smallvec![];
175    for (i, output) in outputs.iter().enumerate() {
176        retval_insts.extend(abi.gen_retval(ctx, i, ValueRegs::one(*output)).into_iter());
177    }
178    abi.emit_call(ctx);
179    for inst in retval_insts {
180        ctx.emit(inst);
181    }
182    abi.emit_stack_post_adjust(ctx);
183
184    Ok(())
185}
186
187/// Returns whether the given input is a shift by a constant value less or equal than 3.
188/// The goal is to embed it within an address mode.
189fn matches_small_constant_shift(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<(InsnInput, u8)> {
190    matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
191        match input_to_imm(
192            ctx,
193            InsnInput {
194                insn: shift,
195                input: 1,
196            },
197        ) {
198            Some(shift_amt) if shift_amt <= 3 => Some((
199                InsnInput {
200                    insn: shift,
201                    input: 0,
202                },
203                shift_amt as u8,
204            )),
205            _ => None,
206        }
207    })
208}
209
210/// Lowers an instruction to one of the x86 addressing modes.
211///
212/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
213fn lower_to_amode(ctx: &mut Lower<Inst>, spec: InsnInput, offset: i32) -> Amode {
214    let flags = ctx
215        .memflags(spec.insn)
216        .expect("Instruction with amode should have memflags");
217
218    // We now either have an add that we must materialize, or some other input; as well as the
219    // final offset.
220    if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
221        debug_assert_eq!(ctx.output_ty(add, 0), types::I64);
222        let add_inputs = &[
223            InsnInput {
224                insn: add,
225                input: 0,
226            },
227            InsnInput {
228                insn: add,
229                input: 1,
230            },
231        ];
232
233        // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
234        // aren't happening in the wasm case. We could do better, given some range analysis.
235        let (base, index, shift) = if let Some((shift_input, shift_amt)) =
236            matches_small_constant_shift(ctx, add_inputs[0])
237        {
238            (
239                put_input_in_reg(ctx, add_inputs[1]),
240                put_input_in_reg(ctx, shift_input),
241                shift_amt,
242            )
243        } else if let Some((shift_input, shift_amt)) =
244            matches_small_constant_shift(ctx, add_inputs[1])
245        {
246            (
247                put_input_in_reg(ctx, add_inputs[0]),
248                put_input_in_reg(ctx, shift_input),
249                shift_amt,
250            )
251        } else {
252            for i in 0..=1 {
253                // Try to pierce through uextend.
254                if let Some(uextend) = matches_input(
255                    ctx,
256                    InsnInput {
257                        insn: add,
258                        input: i,
259                    },
260                    Opcode::Uextend,
261                ) {
262                    if let Some(cst) = ctx.get_input_as_source_or_const(uextend, 0).constant {
263                        // Zero the upper bits.
264                        let input_size = ctx.input_ty(uextend, 0).bits() as u64;
265                        let shift: u64 = 64 - input_size;
266                        let uext_cst: u64 = (cst << shift) >> shift;
267
268                        let final_offset = (offset as i64).wrapping_add(uext_cst as i64);
269                        if low32_will_sign_extend_to_64(final_offset as u64) {
270                            let base = put_input_in_reg(ctx, add_inputs[1 - i]);
271                            return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
272                        }
273                    }
274                }
275
276                // If it's a constant, add it directly!
277                if let Some(cst) = ctx.get_input_as_source_or_const(add, i).constant {
278                    let final_offset = (offset as i64).wrapping_add(cst as i64);
279                    if low32_will_sign_extend_to_64(final_offset as u64) {
280                        let base = put_input_in_reg(ctx, add_inputs[1 - i]);
281                        return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
282                    }
283                }
284            }
285
286            (
287                put_input_in_reg(ctx, add_inputs[0]),
288                put_input_in_reg(ctx, add_inputs[1]),
289                0,
290            )
291        };
292
293        return Amode::imm_reg_reg_shift(
294            offset as u32,
295            Gpr::new(base).unwrap(),
296            Gpr::new(index).unwrap(),
297            shift,
298        )
299        .with_flags(flags);
300    }
301
302    let input = put_input_in_reg(ctx, spec);
303    Amode::imm_reg(offset as u32, input).with_flags(flags)
304}
305
306//=============================================================================
307// Lowering-backend trait implementation.
308
309impl LowerBackend for X64Backend {
310    type MInst = Inst;
311
312    fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {
313        isle::lower(ctx, self, ir_inst)
314    }
315
316    fn lower_branch(
317        &self,
318        ctx: &mut Lower<Inst>,
319        ir_inst: IRInst,
320        targets: &[MachLabel],
321    ) -> Option<()> {
322        isle::lower_branch(ctx, self, ir_inst, targets)
323    }
324
325    fn maybe_pinned_reg(&self) -> Option<Reg> {
326        Some(regs::pinned_reg())
327    }
328}