cranelift_codegen/isa/x64/encoding/
rex.rs

1//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
2//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
3//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
4//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
5//!
6//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
7//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
8//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
9//! means "hardware register encoding number".
10
11use crate::machinst::{Reg, RegClass};
12use crate::{
13    ir::TrapCode,
14    isa::x64::inst::{
15        args::{Amode, OperandSize},
16        regs, Inst, LabelUse,
17    },
18    machinst::MachBuffer,
19};
20
21pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
22    let xs = (x as i32) as i64;
23    xs == ((xs << 56) >> 56)
24}
25
26pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
27    let xs = x as i32;
28    xs == ((xs << 24) >> 24)
29}
30
31/// Encode the ModR/M byte.
32#[inline(always)]
33pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
34    debug_assert!(m0d < 4);
35    debug_assert!(enc_reg_g < 8);
36    debug_assert!(rm_e < 8);
37    ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
38}
39
40#[inline(always)]
41pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
42    debug_assert!(shift < 4);
43    debug_assert!(enc_index < 8);
44    debug_assert!(enc_base < 8);
45    ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
46}
47
48/// Get the encoding number of a GPR.
49#[inline(always)]
50pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
51    let reg = reg.into();
52    debug_assert!(reg.is_real());
53    debug_assert_eq!(reg.class(), RegClass::Int);
54    reg.to_real_reg().unwrap().hw_enc()
55}
56
57/// Get the encoding number of any register.
58#[inline(always)]
59pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
60    let reg = reg.into();
61    debug_assert!(reg.is_real());
62    reg.to_real_reg().unwrap().hw_enc()
63}
64
65/// A small bit field to record a REX prefix specification:
66/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
67/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
68#[repr(transparent)]
69#[derive(Clone, Copy)]
70pub(crate) struct RexFlags(u8);
71
72impl RexFlags {
73    /// By default, set the W field, and don't always emit.
74    #[inline(always)]
75    pub(crate) fn set_w() -> Self {
76        Self(0)
77    }
78    /// Creates a new RexPrefix for which the REX.W bit will be cleared.
79    #[inline(always)]
80    pub(crate) fn clear_w() -> Self {
81        Self(1)
82    }
83
84    #[inline(always)]
85    pub(crate) fn always_emit(&mut self) -> &mut Self {
86        self.0 = self.0 | 2;
87        self
88    }
89
90    #[inline(always)]
91    pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
92        let enc_reg = int_reg_enc(reg);
93        if enc_reg >= 4 && enc_reg <= 7 {
94            self.always_emit();
95        }
96        self
97    }
98
99    #[inline(always)]
100    pub(crate) fn must_clear_w(&self) -> bool {
101        (self.0 & 1) != 0
102    }
103    #[inline(always)]
104    pub(crate) fn must_always_emit(&self) -> bool {
105        (self.0 & 2) != 0
106    }
107
108    #[inline(always)]
109    pub(crate) fn emit_one_op(&self, sink: &mut MachBuffer<Inst>, enc_e: u8) {
110        // Register Operand coded in Opcode Byte
111        // REX.R and REX.X unused
112        // REX.B == 1 accesses r8-r15
113        let w = if self.must_clear_w() { 0 } else { 1 };
114        let r = 0;
115        let x = 0;
116        let b = (enc_e >> 3) & 1;
117        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
118        if rex != 0x40 || self.must_always_emit() {
119            sink.put1(rex);
120        }
121    }
122
123    #[inline(always)]
124    pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
125        let w = if self.must_clear_w() { 0 } else { 1 };
126        let r = (enc_g >> 3) & 1;
127        let x = 0;
128        let b = (enc_e >> 3) & 1;
129        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
130        if rex != 0x40 || self.must_always_emit() {
131            sink.put1(rex);
132        }
133    }
134
135    #[inline(always)]
136    pub fn emit_three_op(
137        &self,
138        sink: &mut MachBuffer<Inst>,
139        enc_g: u8,
140        enc_index: u8,
141        enc_base: u8,
142    ) {
143        let w = if self.must_clear_w() { 0 } else { 1 };
144        let r = (enc_g >> 3) & 1;
145        let x = (enc_index >> 3) & 1;
146        let b = (enc_base >> 3) & 1;
147        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
148        if rex != 0x40 || self.must_always_emit() {
149            sink.put1(rex);
150        }
151    }
152}
153
154/// Generate the proper Rex flags for the given operand size.
155impl From<OperandSize> for RexFlags {
156    fn from(size: OperandSize) -> Self {
157        match size {
158            OperandSize::Size64 => RexFlags::set_w(),
159            _ => RexFlags::clear_w(),
160        }
161    }
162}
163/// Generate Rex flags for an OperandSize/register tuple.
164impl From<(OperandSize, Reg)> for RexFlags {
165    fn from((size, reg): (OperandSize, Reg)) -> Self {
166        let mut rex = RexFlags::from(size);
167        if size == OperandSize::Size8 {
168            rex.always_emit_if_8bit_needed(reg);
169        }
170        rex
171    }
172}
173
174/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
175/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
176#[allow(missing_docs)]
177#[derive(PartialEq)]
178pub enum OpcodeMap {
179    None,
180    _0F,
181    _0F38,
182    _0F3A,
183}
184
185impl OpcodeMap {
186    /// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
187    /// formats pack this information as bits in a prefix (e.g. VEX / EVEX).
188    pub(crate) fn bits(&self) -> u8 {
189        match self {
190            OpcodeMap::None => 0b00,
191            OpcodeMap::_0F => 0b01,
192            OpcodeMap::_0F38 => 0b10,
193            OpcodeMap::_0F3A => 0b11,
194        }
195    }
196}
197
198impl Default for OpcodeMap {
199    fn default() -> Self {
200        Self::None
201    }
202}
203
204/// We may need to include one or more legacy prefix bytes before the REX prefix.  This enum
205/// covers only the small set of possibilities that we actually need.
206#[derive(PartialEq)]
207pub enum LegacyPrefixes {
208    /// No prefix bytes.
209    None,
210    /// Operand Size Override -- here, denoting "16-bit operation".
211    _66,
212    /// The Lock prefix.
213    _F0,
214    /// Operand size override and Lock.
215    _66F0,
216    /// REPNE, but no specific meaning here -- is just an opcode extension.
217    _F2,
218    /// REP/REPE, but no specific meaning here -- is just an opcode extension.
219    _F3,
220    /// Operand size override and same effect as F3.
221    _66F3,
222}
223
224impl LegacyPrefixes {
225    /// Emit the legacy prefix as bytes (e.g. in REX instructions).
226    #[inline(always)]
227    pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
228        match self {
229            Self::_66 => sink.put1(0x66),
230            Self::_F0 => sink.put1(0xF0),
231            Self::_66F0 => {
232                // I don't think the order matters, but in any case, this is the same order that
233                // the GNU assembler uses.
234                sink.put1(0x66);
235                sink.put1(0xF0);
236            }
237            Self::_F2 => sink.put1(0xF2),
238            Self::_F3 => sink.put1(0xF3),
239            Self::_66F3 => {
240                sink.put1(0x66);
241                sink.put1(0xF3);
242            }
243            Self::None => (),
244        }
245    }
246
247    /// Emit the legacy prefix as bits (e.g. for EVEX instructions).
248    #[inline(always)]
249    pub(crate) fn bits(&self) -> u8 {
250        match self {
251            Self::None => 0b00,
252            Self::_66 => 0b01,
253            Self::_F3 => 0b10,
254            Self::_F2 => 0b11,
255            _ => panic!(
256                "VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2"
257            ),
258        }
259    }
260}
261
262impl Default for LegacyPrefixes {
263    fn default() -> Self {
264        Self::None
265    }
266}
267
268/// This is the core 'emit' function for instructions that reference memory.
269///
270/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
271/// create and emit:
272/// - first the legacy prefixes, if any
273/// - then the REX prefix, if needed
274/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
275/// - then the MOD/RM byte,
276/// - then optionally, a SIB byte,
277/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
278///
279/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
280/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
281/// instructions will require their own emitter functions.
282///
283/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
284///
285/// The opcodes are written bigendianly for the convenience of callers.  For example, if the opcode
286/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
287/// 0xF3_0F_27 and `num_opcodes` == 3.
288///
289/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
290/// `rex` can specify special handling for the REX prefix.  By default, the REX prefix will
291/// indicate a 64-bit operation and will be deleted if it is redundant (0x40).  Note that for a
292/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
293/// indicate a 64-bit operation.
294pub(crate) fn emit_std_enc_mem(
295    sink: &mut MachBuffer<Inst>,
296    prefixes: LegacyPrefixes,
297    opcodes: u32,
298    mut num_opcodes: usize,
299    enc_g: u8,
300    mem_e: &Amode,
301    rex: RexFlags,
302    bytes_at_end: u8,
303) {
304    // General comment for this function: the registers in `mem_e` must be
305    // 64-bit integer registers, because they are part of an address
306    // expression.  But `enc_g` can be derived from a register of any class.
307
308    let can_trap = mem_e.can_trap();
309    if can_trap {
310        sink.add_trap(TrapCode::HeapOutOfBounds);
311    }
312
313    prefixes.emit(sink);
314
315    // After prefixes, first emit the REX byte depending on the kind of
316    // addressing mode that's being used.
317    match *mem_e {
318        Amode::ImmReg { base, .. } => {
319            let enc_e = int_reg_enc(base);
320            rex.emit_two_op(sink, enc_g, enc_e);
321        }
322
323        Amode::ImmRegRegShift {
324            base: reg_base,
325            index: reg_index,
326            ..
327        } => {
328            let enc_base = int_reg_enc(*reg_base);
329            let enc_index = int_reg_enc(*reg_index);
330            rex.emit_three_op(sink, enc_g, enc_index, enc_base);
331        }
332
333        Amode::RipRelative { .. } => {
334            // note REX.B = 0.
335            rex.emit_two_op(sink, enc_g, 0);
336        }
337    }
338
339    // Now the opcode(s).  These include any other prefixes the caller
340    // hands to us.
341    while num_opcodes > 0 {
342        num_opcodes -= 1;
343        sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
344    }
345
346    // And finally encode the mod/rm bytes and all further information.
347    emit_modrm_sib_disp(sink, enc_g, mem_e, bytes_at_end)
348}
349
350pub(crate) fn emit_modrm_sib_disp(
351    sink: &mut MachBuffer<Inst>,
352    enc_g: u8,
353    mem_e: &Amode,
354    bytes_at_end: u8,
355) {
356    match *mem_e {
357        Amode::ImmReg { simm32, base, .. } => {
358            let enc_e = int_reg_enc(base);
359
360            // Now the mod/rm and associated immediates.  This is
361            // significantly complicated due to the multiple special cases.
362            if simm32 == 0
363                && enc_e != regs::ENC_RSP
364                && enc_e != regs::ENC_RBP
365                && enc_e != regs::ENC_R12
366                && enc_e != regs::ENC_R13
367            {
368                // FIXME JRS 2020Feb11: those four tests can surely be
369                // replaced by a single mask-and-compare check.  We should do
370                // that because this routine is likely to be hot.
371                sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
372            } else if simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
373                sink.put1(encode_modrm(0, enc_g & 7, 4));
374                sink.put1(0x24);
375            } else if low8_will_sign_extend_to_32(simm32)
376                && enc_e != regs::ENC_RSP
377                && enc_e != regs::ENC_R12
378            {
379                sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
380                sink.put1((simm32 & 0xFF) as u8);
381            } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
382                sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
383                sink.put4(simm32);
384            } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
385                && low8_will_sign_extend_to_32(simm32)
386            {
387                // REX.B distinguishes RSP from R12
388                sink.put1(encode_modrm(1, enc_g & 7, 4));
389                sink.put1(0x24);
390                sink.put1((simm32 & 0xFF) as u8);
391            } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
392                //.. wait for test case for RSP case
393                // REX.B distinguishes RSP from R12
394                sink.put1(encode_modrm(2, enc_g & 7, 4));
395                sink.put1(0x24);
396                sink.put4(simm32);
397            } else {
398                unreachable!("ImmReg");
399            }
400        }
401
402        Amode::ImmRegRegShift {
403            simm32,
404            base: reg_base,
405            index: reg_index,
406            shift,
407            ..
408        } => {
409            let enc_base = int_reg_enc(*reg_base);
410            let enc_index = int_reg_enc(*reg_index);
411
412            // modrm, SIB, immediates.
413            if low8_will_sign_extend_to_32(simm32) && enc_index != regs::ENC_RSP {
414                sink.put1(encode_modrm(1, enc_g & 7, 4));
415                sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
416                sink.put1(simm32 as u8);
417            } else if enc_index != regs::ENC_RSP {
418                sink.put1(encode_modrm(2, enc_g & 7, 4));
419                sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
420                sink.put4(simm32);
421            } else {
422                panic!("ImmRegRegShift");
423            }
424        }
425
426        Amode::RipRelative { ref target } => {
427            // RIP-relative is mod=00, rm=101.
428            sink.put1(encode_modrm(0, enc_g & 7, 0b101));
429
430            let offset = sink.cur_offset();
431            sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
432            // N.B.: some instructions (XmmRmRImm format for example)
433            // have bytes *after* the RIP-relative offset. The
434            // addressed location is relative to the end of the
435            // instruction, but the relocation is nominally relative
436            // to the end of the u32 field. So, to compensate for
437            // this, we emit a negative extra offset in the u32 field
438            // initially, and the relocation will add to it.
439            sink.put4(-(bytes_at_end as i32) as u32);
440        }
441    }
442}
443
444/// This is the core 'emit' function for instructions that do not reference memory.
445///
446/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
447/// operand is a register rather than memory.  Hence it is much simpler.
448pub(crate) fn emit_std_enc_enc(
449    sink: &mut MachBuffer<Inst>,
450    prefixes: LegacyPrefixes,
451    opcodes: u32,
452    mut num_opcodes: usize,
453    enc_g: u8,
454    enc_e: u8,
455    rex: RexFlags,
456) {
457    // EncG and EncE can be derived from registers of any class, and they
458    // don't even have to be from the same class.  For example, for an
459    // integer-to-FP conversion insn, one might be RegClass::I64 and the other
460    // RegClass::V128.
461
462    // The legacy prefixes.
463    prefixes.emit(sink);
464
465    // The rex byte.
466    rex.emit_two_op(sink, enc_g, enc_e);
467
468    // All other prefixes and opcodes.
469    while num_opcodes > 0 {
470        num_opcodes -= 1;
471        sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
472    }
473
474    // Now the mod/rm byte.  The instruction we're generating doesn't access
475    // memory, so there is no SIB byte or immediate -- we're done.
476    sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
477}
478
479// These are merely wrappers for the above two functions that facilitate passing
480// actual `Reg`s rather than their encodings.
481
482pub(crate) fn emit_std_reg_mem(
483    sink: &mut MachBuffer<Inst>,
484    prefixes: LegacyPrefixes,
485    opcodes: u32,
486    num_opcodes: usize,
487    reg_g: Reg,
488    mem_e: &Amode,
489    rex: RexFlags,
490    bytes_at_end: u8,
491) {
492    let enc_g = reg_enc(reg_g);
493    emit_std_enc_mem(
494        sink,
495        prefixes,
496        opcodes,
497        num_opcodes,
498        enc_g,
499        mem_e,
500        rex,
501        bytes_at_end,
502    );
503}
504
505pub(crate) fn emit_std_reg_reg(
506    sink: &mut MachBuffer<Inst>,
507    prefixes: LegacyPrefixes,
508    opcodes: u32,
509    num_opcodes: usize,
510    reg_g: Reg,
511    reg_e: Reg,
512    rex: RexFlags,
513) {
514    let enc_g = reg_enc(reg_g);
515    let enc_e = reg_enc(reg_e);
516    emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
517}
518
519/// Write a suitable number of bits from an imm64 to the sink.
520pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
521    match size {
522        8 | 4 => sink.put4(simm32),
523        2 => sink.put2(simm32 as u16),
524        1 => sink.put1(simm32 as u8),
525        _ => unreachable!(),
526    }
527}