1pub(crate) mod generated_code;
5use crate::{
6 ir::types,
7 ir::AtomicRmwOp,
8 machinst::{InputSourceInst, Reg, Writable},
9};
10use crate::{isle_common_prelude_methods, isle_lower_prelude_methods};
11use generated_code::{Context, MInst, RegisterClass};
12
13use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize};
15use crate::ir::LibCall;
16use crate::isa::x64::lower::emit_vm_call;
17use crate::isa::x64::X64Backend;
18use crate::{
19 ir::{
20 condcodes::{CondCode, FloatCC, IntCC},
21 immediates::*,
22 types::*,
23 BlockCall, Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList,
24 },
25 isa::{
26 unwind::UnwindInst,
27 x64::{
28 abi::X64Caller,
29 inst::{args::*, regs, CallInfo},
30 },
31 },
32 machinst::{
33 isle::*, valueregs, ArgPair, InsnInput, InstOutput, Lower, MachAtomicRmwOp, MachInst,
34 VCodeConstant, VCodeConstantData,
35 },
36};
37use alloc::vec::Vec;
38use regalloc2::PReg;
39use smallvec::SmallVec;
40use std::boxed::Box;
41use std::convert::TryFrom;
42
43type BoxCallInfo = Box<CallInfo>;
44type BoxVecMachLabel = Box<SmallVec<[MachLabel; 4]>>;
45type MachLabelSlice = [MachLabel];
46type VecArgPair = Vec<ArgPair>;
47
48pub struct SinkableLoad {
49 inst: Inst,
50 addr_input: InsnInput,
51 offset: i32,
52}
53
54pub(crate) fn lower(
56 lower_ctx: &mut Lower<MInst>,
57 backend: &X64Backend,
58 inst: Inst,
59) -> Option<InstOutput> {
60 let mut isle_ctx = IsleContext { lower_ctx, backend };
63 generated_code::constructor_lower(&mut isle_ctx, inst)
64}
65
66pub(crate) fn lower_branch(
67 lower_ctx: &mut Lower<MInst>,
68 backend: &X64Backend,
69 branch: Inst,
70 targets: &[MachLabel],
71) -> Option<()> {
72 let mut isle_ctx = IsleContext { lower_ctx, backend };
75 generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec())
76}
77
78impl Context for IsleContext<'_, '_, MInst, X64Backend> {
79 isle_lower_prelude_methods!();
80 isle_prelude_caller_methods!(X64ABIMachineSpec, X64Caller);
81
82 #[inline]
83 fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
84 if ty.bits() == 64 {
85 OperandSize::Size64
86 } else {
87 OperandSize::Size32
88 }
89 }
90
91 #[inline]
92 fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
93 OperandSize::from_ty(ty)
94 }
95
96 fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
97 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
98
99 if let Some(c) = inputs.constant {
100 if let Some(imm) = to_simm32(c as i64) {
101 return imm.to_reg_mem_imm();
102 }
103 }
104
105 self.put_in_reg_mem(val).into()
106 }
107
108 fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
109 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
110
111 if let Some(c) = inputs.constant {
112 if let Some(imm) = to_simm32(c as i64) {
113 return XmmMemImm::new(imm.to_reg_mem_imm()).unwrap();
114 }
115 }
116
117 let res = match self.put_in_xmm_mem(val).to_reg_mem() {
118 RegMem::Reg { reg } => RegMemImm::Reg { reg },
119 RegMem::Mem { addr } => RegMemImm::Mem { addr },
120 };
121
122 XmmMemImm::new(res).unwrap()
123 }
124
125 fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
126 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
127
128 if let Some(c) = inputs.constant {
129 let vcode_constant = self.emit_u128_le_const(c as u128);
135 return XmmMem::new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)))
136 .unwrap();
137 }
138
139 XmmMem::new(self.put_in_reg_mem(val)).unwrap()
140 }
141
142 fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
143 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
144
145 if let Some(c) = inputs.constant {
146 let vcode_constant = self.emit_u64_le_const(c);
150 return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
151 }
152
153 if let Some(load) = self.sinkable_load(val) {
154 return RegMem::Mem {
155 addr: self.sink_load(&load),
156 };
157 }
158
159 RegMem::reg(self.put_in_reg(val))
160 }
161
162 #[inline]
163 fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
164 imm.encode()
165 }
166
167 #[inline]
168 fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
169 imm.encode()
170 }
171
172 #[inline]
173 fn use_avx_simd(&mut self) -> bool {
174 self.backend.x64_flags.use_avx_simd()
175 }
176
177 #[inline]
178 fn use_avx2_simd(&mut self) -> bool {
179 self.backend.x64_flags.use_avx2_simd()
180 }
181
182 #[inline]
183 fn avx512vl_enabled(&mut self, _: Type) -> bool {
184 self.backend.x64_flags.use_avx512vl_simd()
185 }
186
187 #[inline]
188 fn avx512dq_enabled(&mut self, _: Type) -> bool {
189 self.backend.x64_flags.use_avx512dq_simd()
190 }
191
192 #[inline]
193 fn avx512f_enabled(&mut self, _: Type) -> bool {
194 self.backend.x64_flags.use_avx512f_simd()
195 }
196
197 #[inline]
198 fn avx512bitalg_enabled(&mut self, _: Type) -> bool {
199 self.backend.x64_flags.use_avx512bitalg_simd()
200 }
201
202 #[inline]
203 fn avx512vbmi_enabled(&mut self, _: Type) -> bool {
204 self.backend.x64_flags.use_avx512vbmi_simd()
205 }
206
207 #[inline]
208 fn use_lzcnt(&mut self, _: Type) -> bool {
209 self.backend.x64_flags.use_lzcnt()
210 }
211
212 #[inline]
213 fn use_bmi1(&mut self, _: Type) -> bool {
214 self.backend.x64_flags.use_bmi1()
215 }
216
217 #[inline]
218 fn use_popcnt(&mut self, _: Type) -> bool {
219 self.backend.x64_flags.use_popcnt()
220 }
221
222 #[inline]
223 fn use_fma(&mut self) -> bool {
224 self.backend.x64_flags.use_fma()
225 }
226
227 #[inline]
228 fn use_sse41(&mut self, _: Type) -> bool {
229 self.backend.x64_flags.use_sse41()
230 }
231
232 #[inline]
233 fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
234 let inst = self.lower_ctx.dfg().value_def(val).inst()?;
235 let constant = self.lower_ctx.get_constant(inst)?;
236 let imm = u8::try_from(constant).ok()?;
237 Some(Imm8Reg::Imm8 { imm })
238 }
239
240 #[inline]
241 fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Gpr {
242 let mask = self.shift_mask(ty) as u64;
243 Imm8Gpr::new(Imm8Reg::Imm8 {
244 imm: (c & mask) as u8,
245 })
246 .unwrap()
247 }
248
249 #[inline]
250 fn shift_mask(&mut self, ty: Type) -> u32 {
251 debug_assert!(ty.lane_bits().is_power_of_two());
252
253 ty.lane_bits() - 1
254 }
255
256 fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u32 {
257 (val.bits() as u32) & self.shift_mask(ty)
258 }
259
260 #[inline]
261 fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
262 let inst = self.lower_ctx.dfg().value_def(val).inst()?;
263 let constant: u64 = self.lower_ctx.get_constant(inst)?;
264 let constant = constant as i64;
265 to_simm32(constant)
266 }
267
268 #[inline]
269 fn simm32_from_imm64(&mut self, imm: Imm64) -> Option<GprMemImm> {
270 to_simm32(imm.bits())
271 }
272
273 fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
274 let input = self.lower_ctx.get_value_as_source_or_const(val);
275 if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
276 if let Some((addr_input, offset)) =
277 is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
278 {
279 return Some(SinkableLoad {
280 inst,
281 addr_input,
282 offset,
283 });
284 }
285 }
286 None
287 }
288
289 fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
290 let input = self.lower_ctx.get_value_as_source_or_const(val);
291 if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
292 if let Some((addr_input, offset)) =
293 is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
294 {
295 return Some(SinkableLoad {
296 inst,
297 addr_input,
298 offset,
299 });
300 }
301 }
302 None
303 }
304
305 fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
306 self.lower_ctx.sink_inst(load.inst);
307 let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
308 SyntheticAmode::Real(addr)
309 }
310
311 #[inline]
312 fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
313 ExtMode::new(from_bits, to_bits).unwrap()
314 }
315
316 fn emit(&mut self, inst: &MInst) -> Unit {
317 self.lower_ctx.emit(inst.clone());
318 }
319
320 #[inline]
321 fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
322 if x != 0 && x < u64::from(u32::MAX) {
323 Some(x)
324 } else {
325 None
326 }
327 }
328
329 #[inline]
330 fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
331 0b00_00_00_00 | lane << 4
334 }
335
336 #[inline]
337 fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
338 RegMem::mem(addr.clone())
339 }
340
341 #[inline]
342 fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
343 amode.clone().into()
344 }
345
346 #[inline]
347 fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
348 SyntheticAmode::ConstantOffset(c)
349 }
350
351 #[inline]
352 fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
353 r.to_writable_reg()
354 }
355
356 #[inline]
357 fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
358 r.to_writable_reg()
359 }
360
361 fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
362 debug_assert!(amt < 8);
365 let mask_offset = amt as usize * 16;
366 let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
367 &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
368 ));
369 SyntheticAmode::ConstantOffset(mask_constant)
370 }
371
372 fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
373 let mask_table = self
374 .lower_ctx
375 .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
376 SyntheticAmode::ConstantOffset(mask_table)
377 }
378
379 fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
380 debug_assert!(amt < 8);
383 let mask_offset = amt as usize * 16;
384 let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
385 &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
386 ));
387 SyntheticAmode::ConstantOffset(mask_constant)
388 }
389
390 fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
391 let mask_table = self
392 .lower_ctx
393 .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
394 SyntheticAmode::ConstantOffset(mask_table)
395 }
396
397 fn popcount_4bit_table(&mut self) -> VCodeConstant {
398 self.lower_ctx
399 .use_constant(VCodeConstantData::WellKnown(&POPCOUNT_4BIT_TABLE))
400 }
401
402 fn popcount_low_mask(&mut self) -> VCodeConstant {
403 self.lower_ctx
404 .use_constant(VCodeConstantData::WellKnown(&POPCOUNT_LOW_MASK))
405 }
406
407 #[inline]
408 fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
409 Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
410 }
411
412 #[inline]
413 fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
414 r.to_reg()
415 }
416
417 #[inline]
418 fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
419 r.to_reg()
420 }
421
422 #[inline]
423 fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
424 r.into()
425 }
426
427 #[inline]
428 fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
429 r.into()
430 }
431
432 #[inline]
433 fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
434 r.into()
435 }
436
437 #[inline]
438 fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
439 XmmMemImm::new(r.clone().to_reg_mem().into()).unwrap()
440 }
441
442 #[inline]
443 fn temp_writable_gpr(&mut self) -> WritableGpr {
444 Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap())
445 }
446
447 #[inline]
448 fn temp_writable_xmm(&mut self) -> WritableXmm {
449 Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap())
450 }
451
452 #[inline]
453 fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
454 RegMemImm::Reg { reg }
455 }
456
457 #[inline]
458 fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
459 XmmMem::new(rm.clone()).unwrap()
460 }
461
462 #[inline]
463 fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
464 GprMemImm::new(rmi.clone()).unwrap()
465 }
466
467 #[inline]
468 fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
469 XmmMemImm::new(rmi.clone()).unwrap()
470 }
471
472 #[inline]
473 fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
474 r.into()
475 }
476
477 #[inline]
478 fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
479 xm.clone().into()
480 }
481
482 #[inline]
483 fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
484 gm.clone().into()
485 }
486
487 #[inline]
488 fn xmm_new(&mut self, r: Reg) -> Xmm {
489 Xmm::new(r).unwrap()
490 }
491
492 #[inline]
493 fn gpr_new(&mut self, r: Reg) -> Gpr {
494 Gpr::new(r).unwrap()
495 }
496
497 #[inline]
498 fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
499 GprMem::new(rm.clone()).unwrap()
500 }
501
502 #[inline]
503 fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
504 GprMem::new(RegMem::reg(r)).unwrap()
505 }
506
507 #[inline]
508 fn imm8_reg_to_imm8_gpr(&mut self, ir: &Imm8Reg) -> Imm8Gpr {
509 Imm8Gpr::new(ir.clone()).unwrap()
510 }
511
512 #[inline]
513 fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
514 GprMem::from(gpr)
515 }
516
517 #[inline]
518 fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
519 GprMemImm::from(gpr)
520 }
521
522 #[inline]
523 fn gpr_to_imm8_gpr(&mut self, gpr: Gpr) -> Imm8Gpr {
524 Imm8Gpr::from(gpr)
525 }
526
527 #[inline]
528 fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
529 Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap()
530 }
531
532 #[inline]
533 fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
534 if is_int_or_ref_ty(ty) || ty == I128 {
535 Some(RegisterClass::Gpr {
536 single_register: ty != I128,
537 })
538 } else if ty == F32 || ty == F64 || (ty.is_vector() && ty.bits() == 128) {
539 Some(RegisterClass::Xmm)
540 } else {
541 None
542 }
543 }
544
545 #[inline]
546 fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
547 match ty {
548 types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => Some(()),
549 types::R32 => panic!("shouldn't have 32-bits refs on x64"),
550 _ => None,
551 }
552 }
553
554 #[inline]
555 fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC {
556 x.without_equal()
557 }
558
559 #[inline]
560 fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
561 CC::from_intcc(*intcc)
562 }
563
564 #[inline]
565 fn cc_invert(&mut self, cc: &CC) -> CC {
566 cc.invert()
567 }
568
569 #[inline]
570 fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
571 match cc {
572 CC::Z => Some(*cc),
573 CC::NZ => Some(*cc),
574 _ => None,
575 }
576 }
577
578 #[inline]
579 fn sum_extend_fits_in_32_bits(
580 &mut self,
581 extend_from_ty: Type,
582 constant_value: Imm64,
583 offset: Offset32,
584 ) -> Option<u32> {
585 let offset: i64 = offset.into();
586 let constant_value: u64 = constant_value.bits() as u64;
587 let shift = 64 - extend_from_ty.bits();
589 let zero_extended_constant_value = (constant_value << shift) >> shift;
590 let sum = offset.wrapping_add(zero_extended_constant_value as i64);
592 if sum == ((sum << 32) >> 32) {
594 Some(sum as u32)
595 } else {
596 None
597 }
598 }
599
600 #[inline]
601 fn amode_offset(&mut self, addr: &Amode, offset: u32) -> Amode {
602 addr.offset(offset)
603 }
604
605 #[inline]
606 fn zero_offset(&mut self) -> Offset32 {
607 Offset32::new(0)
608 }
609
610 #[inline]
611 fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp {
612 MachAtomicRmwOp::from(*op)
613 }
614
615 #[inline]
616 fn preg_rbp(&mut self) -> PReg {
617 regs::rbp().to_real_reg().unwrap().into()
618 }
619
620 #[inline]
621 fn preg_rsp(&mut self) -> PReg {
622 regs::rsp().to_real_reg().unwrap().into()
623 }
624
625 #[inline]
626 fn preg_pinned(&mut self) -> PReg {
627 regs::pinned_reg().to_real_reg().unwrap().into()
628 }
629
630 fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
631 let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
632 let ret_ty = libcall.signature(call_conv).returns[0].value_type;
633 let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
634
635 emit_vm_call(
636 self.lower_ctx,
637 &self.backend.flags,
638 &self.backend.triple,
639 libcall.clone(),
640 &[a],
641 &[output_reg],
642 )
643 .expect("Failed to emit LibCall");
644
645 output_reg.to_reg()
646 }
647
648 fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
649 let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
650 let ret_ty = libcall.signature(call_conv).returns[0].value_type;
651 let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
652
653 emit_vm_call(
654 self.lower_ctx,
655 &self.backend.flags,
656 &self.backend.triple,
657 libcall.clone(),
658 &[a, b, c],
659 &[output_reg],
660 )
661 .expect("Failed to emit LibCall");
662
663 output_reg.to_reg()
664 }
665
666 #[inline]
667 fn single_target(&mut self, targets: &MachLabelSlice) -> Option<MachLabel> {
668 if targets.len() == 1 {
669 Some(targets[0])
670 } else {
671 None
672 }
673 }
674
675 #[inline]
676 fn two_targets(&mut self, targets: &MachLabelSlice) -> Option<(MachLabel, MachLabel)> {
677 if targets.len() == 2 {
678 Some((targets[0], targets[1]))
679 } else {
680 None
681 }
682 }
683
684 #[inline]
685 fn jump_table_targets(
686 &mut self,
687 targets: &MachLabelSlice,
688 ) -> Option<(MachLabel, BoxVecMachLabel)> {
689 if targets.is_empty() {
690 return None;
691 }
692
693 let default_label = targets[0];
694 let jt_targets = Box::new(SmallVec::from(&targets[1..]));
695 Some((default_label, jt_targets))
696 }
697
698 #[inline]
699 fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 {
700 targets.len() as u32
701 }
702
703 #[inline]
704 fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
705 let const_data = self.lower_ctx.get_constant_data(constant);
706 if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
707 return Some(());
708 }
709 None
710 }
711
712 #[inline]
713 fn fcvt_uint_mask_const(&mut self) -> VCodeConstant {
714 self.lower_ctx
715 .use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
716 }
717
718 #[inline]
719 fn fcvt_uint_mask_high_const(&mut self) -> VCodeConstant {
720 self.lower_ctx
721 .use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH))
722 }
723
724 #[inline]
725 fn iadd_pairwise_mul_const_16(&mut self) -> VCodeConstant {
726 self.lower_ctx
727 .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_16))
728 }
729
730 #[inline]
731 fn iadd_pairwise_mul_const_32(&mut self) -> VCodeConstant {
732 self.lower_ctx
733 .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_32))
734 }
735
736 #[inline]
737 fn iadd_pairwise_xor_const_32(&mut self) -> VCodeConstant {
738 self.lower_ctx
739 .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_XOR_CONST_32))
740 }
741
742 #[inline]
743 fn iadd_pairwise_addd_const_32(&mut self) -> VCodeConstant {
744 self.lower_ctx
745 .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_ADDD_CONST_32))
746 }
747
748 #[inline]
749 fn snarrow_umax_mask(&mut self) -> VCodeConstant {
750 static UMAX_MASK: [u8; 16] = [
752 0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xFF, 0xDF, 0x41, 0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xFF,
753 0xDF, 0x41,
754 ];
755 self.lower_ctx
756 .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
757 }
758
759 #[inline]
760 fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
761 let mask = mask
762 .iter()
763 .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
764 .map(|b| if b > 15 { 0b10000000 } else { b })
765 .collect();
766 self.lower_ctx
767 .use_constant(VCodeConstantData::Generated(mask))
768 }
769
770 #[inline]
771 fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
772 let mask = mask
773 .iter()
774 .map(|&b| if b > 15 { 0b10000000 } else { b })
775 .collect();
776 self.lower_ctx
777 .use_constant(VCodeConstantData::Generated(mask))
778 }
779
780 #[inline]
781 fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
782 let mask = mask
783 .iter()
784 .map(|&b| b.wrapping_sub(16))
785 .map(|b| if b > 15 { 0b10000000 } else { b })
786 .collect();
787 self.lower_ctx
788 .use_constant(VCodeConstantData::Generated(mask))
789 }
790
791 #[inline]
792 fn perm_from_mask_with_zeros(
793 &mut self,
794 mask: &VecMask,
795 ) -> Option<(VCodeConstant, VCodeConstant)> {
796 if !mask.iter().any(|&b| b > 31) {
797 return None;
798 }
799
800 let zeros = mask
801 .iter()
802 .map(|&b| if b > 31 { 0x00 } else { 0xff })
803 .collect();
804
805 Some((
806 self.perm_from_mask(mask),
807 self.lower_ctx
808 .use_constant(VCodeConstantData::Generated(zeros)),
809 ))
810 }
811
812 #[inline]
813 fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
814 let mask = mask.iter().cloned().collect();
815 self.lower_ctx
816 .use_constant(VCodeConstantData::Generated(mask))
817 }
818
819 #[inline]
820 fn swizzle_zero_mask(&mut self) -> VCodeConstant {
821 static ZERO_MASK_VALUE: [u8; 16] = [0x70; 16];
822 self.lower_ctx
823 .use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE))
824 }
825
826 #[inline]
827 fn sqmul_round_sat_mask(&mut self) -> VCodeConstant {
828 static SAT_MASK: [u8; 16] = [
829 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
830 0x00, 0x80,
831 ];
832 self.lower_ctx
833 .use_constant(VCodeConstantData::WellKnown(&SAT_MASK))
834 }
835
836 #[inline]
837 fn uunarrow_umax_mask(&mut self) -> VCodeConstant {
838 static UMAX_MASK: [u8; 16] = [
840 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF,
841 0xEF, 0x41,
842 ];
843
844 self.lower_ctx
845 .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
846 }
847
848 #[inline]
849 fn uunarrow_uint_mask(&mut self) -> VCodeConstant {
850 static UINT_MASK: [u8; 16] = [
851 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
852 0x30, 0x43,
853 ];
854
855 self.lower_ctx
856 .use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
857 }
858
859 fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
860 match XmmMemAligned::new(arg.clone().into()) {
861 Some(aligned) => aligned,
862 None => match arg.clone().into() {
863 RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
864 _ => unreachable!(),
865 },
866 }
867 }
868
869 fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
870 match XmmMemAlignedImm::new(arg.clone().into()) {
871 Some(aligned) => aligned,
872 None => match arg.clone().into() {
873 RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
874 _ => unreachable!(),
875 },
876 }
877 }
878
879 fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
880 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
881 if a < 4 && b < 4 && c < 4 && d < 4 {
882 Some(a | (b << 2) | (c << 4) | (d << 6))
883 } else {
884 None
885 }
886 }
887
888 fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
889 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
890 let a = a.checked_sub(4)?;
894 let b = b.checked_sub(4)?;
895 let c = c.checked_sub(4)?;
896 let d = d.checked_sub(4)?;
897 if a < 4 && b < 4 && c < 4 && d < 4 {
898 Some(a | (b << 2) | (c << 4) | (d << 6))
899 } else {
900 None
901 }
902 }
903
904 fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
905 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
910 let c = c.checked_sub(4)?;
911 let d = d.checked_sub(4)?;
912 if a < 4 && b < 4 && c < 4 && d < 4 {
913 Some(a | (b << 2) | (c << 4) | (d << 6))
914 } else {
915 None
916 }
917 }
918
919 fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
920 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
924 let a = a.checked_sub(4)?;
925 let b = b.checked_sub(4)?;
926 if a < 4 && b < 4 && c < 4 && d < 4 {
927 Some(a | (b << 2) | (c << 4) | (d << 6))
928 } else {
929 None
930 }
931 }
932
933 fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
934 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
938 if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
939 Some(a | (b << 2) | (c << 4) | (d << 6))
940 } else {
941 None
942 }
943 }
944
945 fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
946 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
947 let a = a.checked_sub(8)?;
948 let b = b.checked_sub(8)?;
949 let c = c.checked_sub(8)?;
950 let d = d.checked_sub(8)?;
951 let e = e.checked_sub(8)?;
952 let f = f.checked_sub(8)?;
953 let g = g.checked_sub(8)?;
954 let h = h.checked_sub(8)?;
955 if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
956 Some(a | (b << 2) | (c << 4) | (d << 6))
957 } else {
958 None
959 }
960 }
961
962 fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
963 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
967 let e = e.checked_sub(4)?;
968 let f = f.checked_sub(4)?;
969 let g = g.checked_sub(4)?;
970 let h = h.checked_sub(4)?;
971 if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
972 Some(e | (f << 2) | (g << 4) | (h << 6))
973 } else {
974 None
975 }
976 }
977
978 fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
979 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
982 let a = a.checked_sub(8)?;
983 let b = b.checked_sub(8)?;
984 let c = c.checked_sub(8)?;
985 let d = d.checked_sub(8)?;
986 let e = e.checked_sub(12)?;
987 let f = f.checked_sub(12)?;
988 let g = g.checked_sub(12)?;
989 let h = h.checked_sub(12)?;
990 if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
991 Some(e | (f << 2) | (g << 4) | (h << 6))
992 } else {
993 None
994 }
995 }
996
997 fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
998 let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
999
1000 if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
1001 Some(bytes[0])
1002 } else {
1003 None
1004 }
1005 }
1006
1007 fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
1008 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
1010
1011 let bit = |x: u8, c: u8| {
1020 if x % 8 == c {
1021 if x < 8 {
1022 Some(0)
1023 } else {
1024 Some(1 << c)
1025 }
1026 } else {
1027 None
1028 }
1029 };
1030 Some(
1031 bit(a, 0)?
1032 | bit(b, 1)?
1033 | bit(c, 2)?
1034 | bit(d, 3)?
1035 | bit(e, 4)?
1036 | bit(f, 5)?
1037 | bit(g, 6)?
1038 | bit(h, 7)?,
1039 )
1040 }
1041
1042 fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
1043 XmmMemImm::new(RegMemImm::imm(imm)).unwrap()
1044 }
1045}
1046
1047impl IsleContext<'_, '_, MInst, X64Backend> {
1048 isle_prelude_method_helpers!(X64Caller);
1049
1050 fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1051 let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1052 self.lower_ctx.emit(MInst::XmmUnaryRmRUnaligned {
1053 op: SseOpcode::Movdqu,
1054 src: XmmMem::new(RegMem::mem(addr)).unwrap(),
1055 dst: Writable::from_reg(Xmm::new(tmp.to_reg()).unwrap()),
1056 });
1057 Xmm::new(tmp.to_reg()).unwrap()
1058 }
1059}
1060
1061#[rustfmt::skip] const I8X16_ISHL_MASKS: [u8; 128] = [
1070 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1071 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1072 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1073 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1074 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1075 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1076 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1077 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1078];
1079
1080#[rustfmt::skip] const I8X16_USHR_MASKS: [u8; 128] = [
1082 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1083 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1084 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1085 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1086 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1087 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1088 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1089 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1090];
1091
1092#[rustfmt::skip] const POPCOUNT_4BIT_TABLE: [u8; 16] = [
1096 0x00, 0x01, 0x01, 0x02,
1097 0x01, 0x02, 0x02, 0x03,
1098 0x01, 0x02, 0x02, 0x03,
1099 0x02, 0x03, 0x03, 0x04,
1100];
1101
1102const POPCOUNT_LOW_MASK: [u8; 16] = [0x0f; 16];
1103
1104#[inline]
1105fn to_simm32(constant: i64) -> Option<GprMemImm> {
1106 if constant == ((constant << 32) >> 32) {
1107 Some(
1108 GprMemImm::new(RegMemImm::Imm {
1109 simm32: constant as u32,
1110 })
1111 .unwrap(),
1112 )
1113 } else {
1114 None
1115 }
1116}
1117
1118const UINT_MASK: [u8; 16] = [
1119 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1120];
1121
1122const UINT_MASK_HIGH: [u8; 16] = [
1123 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43,
1124];
1125
1126const IADD_PAIRWISE_MUL_CONST_16: [u8; 16] = [0x01; 16];
1127
1128const IADD_PAIRWISE_MUL_CONST_32: [u8; 16] = [
1129 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
1130];
1131
1132const IADD_PAIRWISE_XOR_CONST_32: [u8; 16] = [
1133 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
1134];
1135
1136const IADD_PAIRWISE_ADDD_CONST_32: [u8; 16] = [
1137 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
1138];