cranelift_codegen/isa/x64/lower/
isle.rs

1//! ISLE integration glue code for x64 lowering.
2
3// Pull in the ISLE generated code.
4pub(crate) mod generated_code;
5use crate::{
6    ir::types,
7    ir::AtomicRmwOp,
8    machinst::{InputSourceInst, Reg, Writable},
9};
10use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
11use generated_code::{Context, MInst, RegisterClass};
12
13// Types that the generated ISLE code uses via `use super::*`.
14use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize};
15use crate::ir::LibCall;
16use crate::isa::x64::lower::emit_vm_call;
17use crate::isa::x64::X64Backend;
18use crate::{
19    ir::{
20        condcodes::{CondCode, FloatCC, IntCC},
21        immediates::*,
22        types::*,
23        BlockCall, Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList,
24    },
25    isa::{
26        unwind::UnwindInst,
27        x64::{
28            abi::X64Caller,
29            inst::{args::*, regs, CallInfo},
30        },
31    },
32    machinst::{
33        isle::*, valueregs, ArgPair, InsnInput, InstOutput, Lower, MachAtomicRmwOp, MachInst,
34        VCodeConstant, VCodeConstantData,
35    },
36};
37use alloc::vec::Vec;
38use regalloc2::PReg;
39use smallvec::SmallVec;
40use std::boxed::Box;
41use std::convert::TryFrom;
42
43type BoxCallInfo = Box<CallInfo>;
44type BoxVecMachLabel = Box<SmallVec<[MachLabel; 4]>>;
45type MachLabelSlice = [MachLabel];
46type VecArgPair = Vec<ArgPair>;
47
48pub struct SinkableLoad {
49    inst: Inst,
50    addr_input: InsnInput,
51    offset: i32,
52}
53
54/// The main entry point for lowering with ISLE.
55pub(crate) fn lower(
56    lower_ctx: &mut Lower<MInst>,
57    backend: &X64Backend,
58    inst: Inst,
59) -> Option<InstOutput> {
60    // TODO: reuse the ISLE context across lowerings so we can reuse its
61    // internal heap allocations.
62    let mut isle_ctx = IsleContext { lower_ctx, backend };
63    generated_code::constructor_lower(&mut isle_ctx, inst)
64}
65
66pub(crate) fn lower_branch(
67    lower_ctx: &mut Lower<MInst>,
68    backend: &X64Backend,
69    branch: Inst,
70    targets: &[MachLabel],
71) -> Option<()> {
72    // TODO: reuse the ISLE context across lowerings so we can reuse its
73    // internal heap allocations.
74    let mut isle_ctx = IsleContext { lower_ctx, backend };
75    generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec())
76}
77
78impl Context for IsleContext<'_, '_, MInst, X64Backend> {
79    isle_lower_prelude_methods!();
80    isle_prelude_caller_methods!(X64ABIMachineSpec, X64Caller);
81
82    #[inline]
83    fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
84        if ty.bits() == 64 {
85            OperandSize::Size64
86        } else {
87            OperandSize::Size32
88        }
89    }
90
91    #[inline]
92    fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
93        OperandSize::from_ty(ty)
94    }
95
96    fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
97        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
98
99        if let Some(c) = inputs.constant {
100            if let Some(imm) = to_simm32(c as i64) {
101                return imm.to_reg_mem_imm();
102            }
103        }
104
105        self.put_in_reg_mem(val).into()
106    }
107
108    fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
109        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
110
111        if let Some(c) = inputs.constant {
112            if let Some(imm) = to_simm32(c as i64) {
113                return XmmMemImm::new(imm.to_reg_mem_imm()).unwrap();
114            }
115        }
116
117        let res = match self.put_in_xmm_mem(val).to_reg_mem() {
118            RegMem::Reg { reg } => RegMemImm::Reg { reg },
119            RegMem::Mem { addr } => RegMemImm::Mem { addr },
120        };
121
122        XmmMemImm::new(res).unwrap()
123    }
124
125    fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
126        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
127
128        if let Some(c) = inputs.constant {
129            // A load from the constant pool is better than a rematerialization into a register,
130            // because it reduces register pressure.
131            //
132            // NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force
133            // constants to be 16 bytes when a constant will be used in place of an xmm register.
134            let vcode_constant = self.emit_u128_le_const(c as u128);
135            return XmmMem::new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)))
136                .unwrap();
137        }
138
139        XmmMem::new(self.put_in_reg_mem(val)).unwrap()
140    }
141
142    fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
143        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
144
145        if let Some(c) = inputs.constant {
146            // A load from the constant pool is better than a
147            // rematerialization into a register, because it reduces
148            // register pressure.
149            let vcode_constant = self.emit_u64_le_const(c);
150            return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
151        }
152
153        if let Some(load) = self.sinkable_load(val) {
154            return RegMem::Mem {
155                addr: self.sink_load(&load),
156            };
157        }
158
159        RegMem::reg(self.put_in_reg(val))
160    }
161
162    #[inline]
163    fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
164        imm.encode()
165    }
166
167    #[inline]
168    fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
169        imm.encode()
170    }
171
172    #[inline]
173    fn use_avx_simd(&mut self) -> bool {
174        self.backend.x64_flags.use_avx_simd()
175    }
176
177    #[inline]
178    fn use_avx2_simd(&mut self) -> bool {
179        self.backend.x64_flags.use_avx2_simd()
180    }
181
182    #[inline]
183    fn avx512vl_enabled(&mut self, _: Type) -> bool {
184        self.backend.x64_flags.use_avx512vl_simd()
185    }
186
187    #[inline]
188    fn avx512dq_enabled(&mut self, _: Type) -> bool {
189        self.backend.x64_flags.use_avx512dq_simd()
190    }
191
192    #[inline]
193    fn avx512f_enabled(&mut self, _: Type) -> bool {
194        self.backend.x64_flags.use_avx512f_simd()
195    }
196
197    #[inline]
198    fn avx512bitalg_enabled(&mut self, _: Type) -> bool {
199        self.backend.x64_flags.use_avx512bitalg_simd()
200    }
201
202    #[inline]
203    fn avx512vbmi_enabled(&mut self, _: Type) -> bool {
204        self.backend.x64_flags.use_avx512vbmi_simd()
205    }
206
207    #[inline]
208    fn use_lzcnt(&mut self, _: Type) -> bool {
209        self.backend.x64_flags.use_lzcnt()
210    }
211
212    #[inline]
213    fn use_bmi1(&mut self, _: Type) -> bool {
214        self.backend.x64_flags.use_bmi1()
215    }
216
217    #[inline]
218    fn use_popcnt(&mut self, _: Type) -> bool {
219        self.backend.x64_flags.use_popcnt()
220    }
221
222    #[inline]
223    fn use_fma(&mut self) -> bool {
224        self.backend.x64_flags.use_fma()
225    }
226
227    #[inline]
228    fn use_sse41(&mut self, _: Type) -> bool {
229        self.backend.x64_flags.use_sse41()
230    }
231
232    #[inline]
233    fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
234        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
235        let constant = self.lower_ctx.get_constant(inst)?;
236        let imm = u8::try_from(constant).ok()?;
237        Some(Imm8Reg::Imm8 { imm })
238    }
239
240    #[inline]
241    fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Gpr {
242        let mask = self.shift_mask(ty) as u64;
243        Imm8Gpr::new(Imm8Reg::Imm8 {
244            imm: (c & mask) as u8,
245        })
246        .unwrap()
247    }
248
249    #[inline]
250    fn shift_mask(&mut self, ty: Type) -> u32 {
251        debug_assert!(ty.lane_bits().is_power_of_two());
252
253        ty.lane_bits() - 1
254    }
255
256    fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u32 {
257        (val.bits() as u32) & self.shift_mask(ty)
258    }
259
260    #[inline]
261    fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
262        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
263        let constant: u64 = self.lower_ctx.get_constant(inst)?;
264        let constant = constant as i64;
265        to_simm32(constant)
266    }
267
268    #[inline]
269    fn simm32_from_imm64(&mut self, imm: Imm64) -> Option<GprMemImm> {
270        to_simm32(imm.bits())
271    }
272
273    fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
274        let input = self.lower_ctx.get_value_as_source_or_const(val);
275        if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
276            if let Some((addr_input, offset)) =
277                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
278            {
279                return Some(SinkableLoad {
280                    inst,
281                    addr_input,
282                    offset,
283                });
284            }
285        }
286        None
287    }
288
289    fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
290        let input = self.lower_ctx.get_value_as_source_or_const(val);
291        if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
292            if let Some((addr_input, offset)) =
293                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
294            {
295                return Some(SinkableLoad {
296                    inst,
297                    addr_input,
298                    offset,
299                });
300            }
301        }
302        None
303    }
304
305    fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
306        self.lower_ctx.sink_inst(load.inst);
307        let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
308        SyntheticAmode::Real(addr)
309    }
310
311    #[inline]
312    fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
313        ExtMode::new(from_bits, to_bits).unwrap()
314    }
315
316    fn emit(&mut self, inst: &MInst) -> Unit {
317        self.lower_ctx.emit(inst.clone());
318    }
319
320    #[inline]
321    fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
322        if x != 0 && x < u64::from(u32::MAX) {
323            Some(x)
324        } else {
325            None
326        }
327    }
328
329    #[inline]
330    fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
331        // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
332        // shifted into bits 5:6).
333        0b00_00_00_00 | lane << 4
334    }
335
336    #[inline]
337    fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
338        RegMem::mem(addr.clone())
339    }
340
341    #[inline]
342    fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
343        amode.clone().into()
344    }
345
346    #[inline]
347    fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
348        SyntheticAmode::ConstantOffset(c)
349    }
350
351    #[inline]
352    fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
353        r.to_writable_reg()
354    }
355
356    #[inline]
357    fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
358        r.to_writable_reg()
359    }
360
361    fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
362        // When the shift amount is known, we can statically (i.e. at compile
363        // time) determine the mask to use and only emit that.
364        debug_assert!(amt < 8);
365        let mask_offset = amt as usize * 16;
366        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
367            &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
368        ));
369        SyntheticAmode::ConstantOffset(mask_constant)
370    }
371
372    fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
373        let mask_table = self
374            .lower_ctx
375            .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
376        SyntheticAmode::ConstantOffset(mask_table)
377    }
378
379    fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
380        // When the shift amount is known, we can statically (i.e. at compile
381        // time) determine the mask to use and only emit that.
382        debug_assert!(amt < 8);
383        let mask_offset = amt as usize * 16;
384        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
385            &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
386        ));
387        SyntheticAmode::ConstantOffset(mask_constant)
388    }
389
390    fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
391        let mask_table = self
392            .lower_ctx
393            .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
394        SyntheticAmode::ConstantOffset(mask_table)
395    }
396
397    fn popcount_4bit_table(&mut self) -> VCodeConstant {
398        self.lower_ctx
399            .use_constant(VCodeConstantData::WellKnown(&POPCOUNT_4BIT_TABLE))
400    }
401
402    fn popcount_low_mask(&mut self) -> VCodeConstant {
403        self.lower_ctx
404            .use_constant(VCodeConstantData::WellKnown(&POPCOUNT_LOW_MASK))
405    }
406
407    #[inline]
408    fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
409        Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
410    }
411
412    #[inline]
413    fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
414        r.to_reg()
415    }
416
417    #[inline]
418    fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
419        r.to_reg()
420    }
421
422    #[inline]
423    fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
424        r.into()
425    }
426
427    #[inline]
428    fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
429        r.into()
430    }
431
432    #[inline]
433    fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
434        r.into()
435    }
436
437    #[inline]
438    fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
439        XmmMemImm::new(r.clone().to_reg_mem().into()).unwrap()
440    }
441
442    #[inline]
443    fn temp_writable_gpr(&mut self) -> WritableGpr {
444        Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap())
445    }
446
447    #[inline]
448    fn temp_writable_xmm(&mut self) -> WritableXmm {
449        Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap())
450    }
451
452    #[inline]
453    fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
454        RegMemImm::Reg { reg }
455    }
456
457    #[inline]
458    fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
459        XmmMem::new(rm.clone()).unwrap()
460    }
461
462    #[inline]
463    fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
464        GprMemImm::new(rmi.clone()).unwrap()
465    }
466
467    #[inline]
468    fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
469        XmmMemImm::new(rmi.clone()).unwrap()
470    }
471
472    #[inline]
473    fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
474        r.into()
475    }
476
477    #[inline]
478    fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
479        xm.clone().into()
480    }
481
482    #[inline]
483    fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
484        gm.clone().into()
485    }
486
487    #[inline]
488    fn xmm_new(&mut self, r: Reg) -> Xmm {
489        Xmm::new(r).unwrap()
490    }
491
492    #[inline]
493    fn gpr_new(&mut self, r: Reg) -> Gpr {
494        Gpr::new(r).unwrap()
495    }
496
497    #[inline]
498    fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
499        GprMem::new(rm.clone()).unwrap()
500    }
501
502    #[inline]
503    fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
504        GprMem::new(RegMem::reg(r)).unwrap()
505    }
506
507    #[inline]
508    fn imm8_reg_to_imm8_gpr(&mut self, ir: &Imm8Reg) -> Imm8Gpr {
509        Imm8Gpr::new(ir.clone()).unwrap()
510    }
511
512    #[inline]
513    fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
514        GprMem::from(gpr)
515    }
516
517    #[inline]
518    fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
519        GprMemImm::from(gpr)
520    }
521
522    #[inline]
523    fn gpr_to_imm8_gpr(&mut self, gpr: Gpr) -> Imm8Gpr {
524        Imm8Gpr::from(gpr)
525    }
526
527    #[inline]
528    fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
529        Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap()
530    }
531
532    #[inline]
533    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
534        if is_int_or_ref_ty(ty) || ty == I128 {
535            Some(RegisterClass::Gpr {
536                single_register: ty != I128,
537            })
538        } else if ty == F32 || ty == F64 || (ty.is_vector() && ty.bits() == 128) {
539            Some(RegisterClass::Xmm)
540        } else {
541            None
542        }
543    }
544
545    #[inline]
546    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
547        match ty {
548            types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => Some(()),
549            types::R32 => panic!("shouldn't have 32-bits refs on x64"),
550            _ => None,
551        }
552    }
553
554    #[inline]
555    fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC {
556        x.without_equal()
557    }
558
559    #[inline]
560    fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
561        CC::from_intcc(*intcc)
562    }
563
564    #[inline]
565    fn cc_invert(&mut self, cc: &CC) -> CC {
566        cc.invert()
567    }
568
569    #[inline]
570    fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
571        match cc {
572            CC::Z => Some(*cc),
573            CC::NZ => Some(*cc),
574            _ => None,
575        }
576    }
577
578    #[inline]
579    fn sum_extend_fits_in_32_bits(
580        &mut self,
581        extend_from_ty: Type,
582        constant_value: Imm64,
583        offset: Offset32,
584    ) -> Option<u32> {
585        let offset: i64 = offset.into();
586        let constant_value: u64 = constant_value.bits() as u64;
587        // If necessary, zero extend `constant_value` up to 64 bits.
588        let shift = 64 - extend_from_ty.bits();
589        let zero_extended_constant_value = (constant_value << shift) >> shift;
590        // Sum up the two operands.
591        let sum = offset.wrapping_add(zero_extended_constant_value as i64);
592        // Check that the sum will fit in 32-bits.
593        if sum == ((sum << 32) >> 32) {
594            Some(sum as u32)
595        } else {
596            None
597        }
598    }
599
600    #[inline]
601    fn amode_offset(&mut self, addr: &Amode, offset: u32) -> Amode {
602        addr.offset(offset)
603    }
604
605    #[inline]
606    fn zero_offset(&mut self) -> Offset32 {
607        Offset32::new(0)
608    }
609
610    #[inline]
611    fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp {
612        MachAtomicRmwOp::from(*op)
613    }
614
615    #[inline]
616    fn preg_rbp(&mut self) -> PReg {
617        regs::rbp().to_real_reg().unwrap().into()
618    }
619
620    #[inline]
621    fn preg_rsp(&mut self) -> PReg {
622        regs::rsp().to_real_reg().unwrap().into()
623    }
624
625    #[inline]
626    fn preg_pinned(&mut self) -> PReg {
627        regs::pinned_reg().to_real_reg().unwrap().into()
628    }
629
630    fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
631        let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
632        let ret_ty = libcall.signature(call_conv).returns[0].value_type;
633        let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
634
635        emit_vm_call(
636            self.lower_ctx,
637            &self.backend.flags,
638            &self.backend.triple,
639            libcall.clone(),
640            &[a],
641            &[output_reg],
642        )
643        .expect("Failed to emit LibCall");
644
645        output_reg.to_reg()
646    }
647
648    fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
649        let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
650        let ret_ty = libcall.signature(call_conv).returns[0].value_type;
651        let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
652
653        emit_vm_call(
654            self.lower_ctx,
655            &self.backend.flags,
656            &self.backend.triple,
657            libcall.clone(),
658            &[a, b, c],
659            &[output_reg],
660        )
661        .expect("Failed to emit LibCall");
662
663        output_reg.to_reg()
664    }
665
666    #[inline]
667    fn single_target(&mut self, targets: &MachLabelSlice) -> Option<MachLabel> {
668        if targets.len() == 1 {
669            Some(targets[0])
670        } else {
671            None
672        }
673    }
674
675    #[inline]
676    fn two_targets(&mut self, targets: &MachLabelSlice) -> Option<(MachLabel, MachLabel)> {
677        if targets.len() == 2 {
678            Some((targets[0], targets[1]))
679        } else {
680            None
681        }
682    }
683
684    #[inline]
685    fn jump_table_targets(
686        &mut self,
687        targets: &MachLabelSlice,
688    ) -> Option<(MachLabel, BoxVecMachLabel)> {
689        if targets.is_empty() {
690            return None;
691        }
692
693        let default_label = targets[0];
694        let jt_targets = Box::new(SmallVec::from(&targets[1..]));
695        Some((default_label, jt_targets))
696    }
697
698    #[inline]
699    fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 {
700        targets.len() as u32
701    }
702
703    #[inline]
704    fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
705        let const_data = self.lower_ctx.get_constant_data(constant);
706        if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
707            return Some(());
708        }
709        None
710    }
711
712    #[inline]
713    fn fcvt_uint_mask_const(&mut self) -> VCodeConstant {
714        self.lower_ctx
715            .use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
716    }
717
718    #[inline]
719    fn fcvt_uint_mask_high_const(&mut self) -> VCodeConstant {
720        self.lower_ctx
721            .use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH))
722    }
723
724    #[inline]
725    fn iadd_pairwise_mul_const_16(&mut self) -> VCodeConstant {
726        self.lower_ctx
727            .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_16))
728    }
729
730    #[inline]
731    fn iadd_pairwise_mul_const_32(&mut self) -> VCodeConstant {
732        self.lower_ctx
733            .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_32))
734    }
735
736    #[inline]
737    fn iadd_pairwise_xor_const_32(&mut self) -> VCodeConstant {
738        self.lower_ctx
739            .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_XOR_CONST_32))
740    }
741
742    #[inline]
743    fn iadd_pairwise_addd_const_32(&mut self) -> VCodeConstant {
744        self.lower_ctx
745            .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_ADDD_CONST_32))
746    }
747
748    #[inline]
749    fn snarrow_umax_mask(&mut self) -> VCodeConstant {
750        // 2147483647.0 is equivalent to 0x41DFFFFFFFC00000
751        static UMAX_MASK: [u8; 16] = [
752            0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xFF, 0xDF, 0x41, 0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xFF,
753            0xDF, 0x41,
754        ];
755        self.lower_ctx
756            .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
757    }
758
759    #[inline]
760    fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
761        let mask = mask
762            .iter()
763            .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
764            .map(|b| if b > 15 { 0b10000000 } else { b })
765            .collect();
766        self.lower_ctx
767            .use_constant(VCodeConstantData::Generated(mask))
768    }
769
770    #[inline]
771    fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
772        let mask = mask
773            .iter()
774            .map(|&b| if b > 15 { 0b10000000 } else { b })
775            .collect();
776        self.lower_ctx
777            .use_constant(VCodeConstantData::Generated(mask))
778    }
779
780    #[inline]
781    fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
782        let mask = mask
783            .iter()
784            .map(|&b| b.wrapping_sub(16))
785            .map(|b| if b > 15 { 0b10000000 } else { b })
786            .collect();
787        self.lower_ctx
788            .use_constant(VCodeConstantData::Generated(mask))
789    }
790
791    #[inline]
792    fn perm_from_mask_with_zeros(
793        &mut self,
794        mask: &VecMask,
795    ) -> Option<(VCodeConstant, VCodeConstant)> {
796        if !mask.iter().any(|&b| b > 31) {
797            return None;
798        }
799
800        let zeros = mask
801            .iter()
802            .map(|&b| if b > 31 { 0x00 } else { 0xff })
803            .collect();
804
805        Some((
806            self.perm_from_mask(mask),
807            self.lower_ctx
808                .use_constant(VCodeConstantData::Generated(zeros)),
809        ))
810    }
811
812    #[inline]
813    fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
814        let mask = mask.iter().cloned().collect();
815        self.lower_ctx
816            .use_constant(VCodeConstantData::Generated(mask))
817    }
818
819    #[inline]
820    fn swizzle_zero_mask(&mut self) -> VCodeConstant {
821        static ZERO_MASK_VALUE: [u8; 16] = [0x70; 16];
822        self.lower_ctx
823            .use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE))
824    }
825
826    #[inline]
827    fn sqmul_round_sat_mask(&mut self) -> VCodeConstant {
828        static SAT_MASK: [u8; 16] = [
829            0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
830            0x00, 0x80,
831        ];
832        self.lower_ctx
833            .use_constant(VCodeConstantData::WellKnown(&SAT_MASK))
834    }
835
836    #[inline]
837    fn uunarrow_umax_mask(&mut self) -> VCodeConstant {
838        // 4294967295.0 is equivalent to 0x41EFFFFFFFE00000
839        static UMAX_MASK: [u8; 16] = [
840            0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF,
841            0xEF, 0x41,
842        ];
843
844        self.lower_ctx
845            .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
846    }
847
848    #[inline]
849    fn uunarrow_uint_mask(&mut self) -> VCodeConstant {
850        static UINT_MASK: [u8; 16] = [
851            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
852            0x30, 0x43,
853        ];
854
855        self.lower_ctx
856            .use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
857    }
858
859    fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
860        match XmmMemAligned::new(arg.clone().into()) {
861            Some(aligned) => aligned,
862            None => match arg.clone().into() {
863                RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
864                _ => unreachable!(),
865            },
866        }
867    }
868
869    fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
870        match XmmMemAlignedImm::new(arg.clone().into()) {
871            Some(aligned) => aligned,
872            None => match arg.clone().into() {
873                RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
874                _ => unreachable!(),
875            },
876        }
877    }
878
879    fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
880        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
881        if a < 4 && b < 4 && c < 4 && d < 4 {
882            Some(a | (b << 2) | (c << 4) | (d << 6))
883        } else {
884            None
885        }
886    }
887
888    fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
889        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
890        // When selecting from the right-hand-side, subtract these all by 4
891        // which will bail out if anything is less than 4. Afterwards the check
892        // is the same as `pshufd_lhs_imm` above.
893        let a = a.checked_sub(4)?;
894        let b = b.checked_sub(4)?;
895        let c = c.checked_sub(4)?;
896        let d = d.checked_sub(4)?;
897        if a < 4 && b < 4 && c < 4 && d < 4 {
898            Some(a | (b << 2) | (c << 4) | (d << 6))
899        } else {
900            None
901        }
902    }
903
904    fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
905        // The `shufps` instruction selects the first two elements from the
906        // first vector and the second two elements from the second vector, so
907        // offset the third/fourth selectors by 4 and then make sure everything
908        // fits in 32-bits.
909        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
910        let c = c.checked_sub(4)?;
911        let d = d.checked_sub(4)?;
912        if a < 4 && b < 4 && c < 4 && d < 4 {
913            Some(a | (b << 2) | (c << 4) | (d << 6))
914        } else {
915            None
916        }
917    }
918
919    fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
920        // This is almost the same as `shufps_imm` except the elements that are
921        // subtracted are reversed. This handles the case that `shufps`
922        // instruction can be emitted if the order of the operands are swapped.
923        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
924        let a = a.checked_sub(4)?;
925        let b = b.checked_sub(4)?;
926        if a < 4 && b < 4 && c < 4 && d < 4 {
927            Some(a | (b << 2) | (c << 4) | (d << 6))
928        } else {
929            None
930        }
931    }
932
933    fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
934        // Similar to `shufps` except this operates over 16-bit values so four
935        // of them must be fixed and the other four must be in-range to encode
936        // in the immediate.
937        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
938        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
939            Some(a | (b << 2) | (c << 4) | (d << 6))
940        } else {
941            None
942        }
943    }
944
945    fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
946        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
947        let a = a.checked_sub(8)?;
948        let b = b.checked_sub(8)?;
949        let c = c.checked_sub(8)?;
950        let d = d.checked_sub(8)?;
951        let e = e.checked_sub(8)?;
952        let f = f.checked_sub(8)?;
953        let g = g.checked_sub(8)?;
954        let h = h.checked_sub(8)?;
955        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
956            Some(a | (b << 2) | (c << 4) | (d << 6))
957        } else {
958            None
959        }
960    }
961
962    fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
963        // Similar to `pshuflw` except that the first four operands must be
964        // fixed and the second four are offset by an extra 4 and tested to
965        // make sure they're all in the range [4, 8).
966        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
967        let e = e.checked_sub(4)?;
968        let f = f.checked_sub(4)?;
969        let g = g.checked_sub(4)?;
970        let h = h.checked_sub(4)?;
971        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
972            Some(e | (f << 2) | (g << 4) | (h << 6))
973        } else {
974            None
975        }
976    }
977
978    fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
979        // Note that everything here is offset by at least 8 and the upper
980        // bits are offset by 12 to test they're in the range of [12, 16).
981        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
982        let a = a.checked_sub(8)?;
983        let b = b.checked_sub(8)?;
984        let c = c.checked_sub(8)?;
985        let d = d.checked_sub(8)?;
986        let e = e.checked_sub(12)?;
987        let f = f.checked_sub(12)?;
988        let g = g.checked_sub(12)?;
989        let h = h.checked_sub(12)?;
990        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
991            Some(e | (f << 2) | (g << 4) | (h << 6))
992        } else {
993            None
994        }
995    }
996
997    fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
998        let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
999
1000        if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
1001            Some(bytes[0])
1002        } else {
1003            None
1004        }
1005    }
1006
1007    fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
1008        // First make sure that the shuffle immediate is selecting 16-bit lanes.
1009        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
1010
1011        // Next build up an 8-bit mask from each of the bits of the selected
1012        // lanes above. This instruction can only be used when each lane
1013        // selector chooses from the corresponding lane in either of the two
1014        // operands, meaning the Nth lane selection must satisfy `lane % 8 ==
1015        // N`.
1016        //
1017        // This helper closure is used to calculate the value of the
1018        // corresponding bit.
1019        let bit = |x: u8, c: u8| {
1020            if x % 8 == c {
1021                if x < 8 {
1022                    Some(0)
1023                } else {
1024                    Some(1 << c)
1025                }
1026            } else {
1027                None
1028            }
1029        };
1030        Some(
1031            bit(a, 0)?
1032                | bit(b, 1)?
1033                | bit(c, 2)?
1034                | bit(d, 3)?
1035                | bit(e, 4)?
1036                | bit(f, 5)?
1037                | bit(g, 6)?
1038                | bit(h, 7)?,
1039        )
1040    }
1041
1042    fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
1043        XmmMemImm::new(RegMemImm::imm(imm)).unwrap()
1044    }
1045}
1046
1047impl IsleContext<'_, '_, MInst, X64Backend> {
1048    isle_prelude_method_helpers!(X64Caller);
1049
1050    fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1051        let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1052        self.lower_ctx.emit(MInst::XmmUnaryRmRUnaligned {
1053            op: SseOpcode::Movdqu,
1054            src: XmmMem::new(RegMem::mem(addr)).unwrap(),
1055            dst: Writable::from_reg(Xmm::new(tmp.to_reg()).unwrap()),
1056        });
1057        Xmm::new(tmp.to_reg()).unwrap()
1058    }
1059}
1060
1061// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
1062// need to fix up the bits that migrate from one half of the lane to the
1063// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift
1064// right by 0 (no movement), we want to retain all the bits so we mask with
1065// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so
1066// we mask with `0x7f`; etc.
1067
1068#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1069const I8X16_ISHL_MASKS: [u8; 128] = [
1070    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1071    0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1072    0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1073    0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1074    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1075    0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1076    0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1077    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1078];
1079
1080#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1081const I8X16_USHR_MASKS: [u8; 128] = [
1082    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1083    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1084    0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1085    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1086    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1087    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1088    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1089    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1090];
1091
1092/// Number of bits set in a given nibble (4-bit value). Used in the
1093/// vector implementation of popcount.
1094#[rustfmt::skip] // Preserve 4x4 layout.
1095const POPCOUNT_4BIT_TABLE: [u8; 16] = [
1096    0x00, 0x01, 0x01, 0x02,
1097    0x01, 0x02, 0x02, 0x03,
1098    0x01, 0x02, 0x02, 0x03,
1099    0x02, 0x03, 0x03, 0x04,
1100];
1101
1102const POPCOUNT_LOW_MASK: [u8; 16] = [0x0f; 16];
1103
1104#[inline]
1105fn to_simm32(constant: i64) -> Option<GprMemImm> {
1106    if constant == ((constant << 32) >> 32) {
1107        Some(
1108            GprMemImm::new(RegMemImm::Imm {
1109                simm32: constant as u32,
1110            })
1111            .unwrap(),
1112        )
1113    } else {
1114        None
1115    }
1116}
1117
1118const UINT_MASK: [u8; 16] = [
1119    0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1120];
1121
1122const UINT_MASK_HIGH: [u8; 16] = [
1123    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43,
1124];
1125
1126const IADD_PAIRWISE_MUL_CONST_16: [u8; 16] = [0x01; 16];
1127
1128const IADD_PAIRWISE_MUL_CONST_32: [u8; 16] = [
1129    0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
1130];
1131
1132const IADD_PAIRWISE_XOR_CONST_32: [u8; 16] = [
1133    0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
1134];
1135
1136const IADD_PAIRWISE_ADDD_CONST_32: [u8; 16] = [
1137    0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
1138];