cranelift_codegen/isa/x64/encoding/
evex.rs

1//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The
2//! EVEX encoding requires a 4-byte prefix:
3//!
4//! Byte 0:  0x62
5//!         ┌───┬───┬───┬───┬───┬───┬───┬───┐
6//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
7//!         ├───┼───┼───┼───┼───┼───┼───┼───┤
8//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
9//!         ├───┼───┼───┼───┼───┼───┼───┼───┤
10//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
11//!         └───┴───┴───┴───┴───┴───┴───┴───┘
12//!
13//! The prefix is then followeded by the opcode byte, the ModR/M byte, and other optional suffixes
14//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel
15//! Software Development Manual, volume 2A).
16use super::rex::{encode_modrm, LegacyPrefixes, OpcodeMap};
17use super::ByteSink;
18use core::ops::RangeInclusive;
19
20/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually
21/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code:
22/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
23pub struct EvexInstruction {
24    bits: u32,
25    opcode: u8,
26    reg: Register,
27    rm: Register,
28}
29
30/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may
31/// choose to skip setting fields, here we set some sane defaults. Note that:
32/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value
33///   implemented--remember the little-endian order
34/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1
35/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte
36///   3).
37///
38/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX,
39/// unsetting the W bit, etc.)
40impl Default for EvexInstruction {
41    fn default() -> Self {
42        Self {
43            bits: 0x08_7C_F0_62,
44            opcode: 0,
45            reg: Register::default(),
46            rm: Register::default(),
47        }
48    }
49}
50
51#[allow(non_upper_case_globals)] // This makes it easier to match the bit range names to the manual's names.
52impl EvexInstruction {
53    /// Construct a default EVEX instruction.
54    pub fn new() -> Self {
55        Self::default()
56    }
57
58    /// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding,
59    /// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a
60    /// way to set those context bits and verify that both are not used (e.g. rounding AND length).
61    /// For now, this method is very convenient.
62    #[inline(always)]
63    pub fn length(mut self, length: EvexVectorLength) -> Self {
64        self.write(Self::LL, EvexContext::Other { length }.bits() as u32);
65        self
66    }
67
68    /// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions
69    /// pack these into the prefix, not as separate bytes.
70    #[inline(always)]
71    pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
72        self.write(Self::pp, prefix.bits() as u32);
73        self
74    }
75
76    /// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack
77    /// these into the prefix, not as separate bytes.
78    #[inline(always)]
79    pub fn map(mut self, map: OpcodeMap) -> Self {
80        self.write(Self::mm, map.bits() as u32);
81        self
82    }
83
84    /// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g.
85    /// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
86    /// prefix.
87    #[inline(always)]
88    pub fn w(mut self, w: bool) -> Self {
89        self.write(Self::W, w as u32);
90        self
91    }
92
93    /// Set the instruction opcode byte.
94    #[inline(always)]
95    pub fn opcode(mut self, opcode: u8) -> Self {
96        self.opcode = opcode;
97        self
98    }
99
100    /// Set the register to use for the `reg` bits; many instructions use this as the write operand.
101    /// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension
102    /// bits for register encodings > 8).
103    #[inline(always)]
104    pub fn reg(mut self, reg: impl Into<Register>) -> Self {
105        self.reg = reg.into();
106        let r = !(self.reg.0 >> 3) & 1;
107        let r_ = !(self.reg.0 >> 4) & 1;
108        self.write(Self::R, r as u32);
109        self.write(Self::R_, r_ as u32);
110        self
111    }
112
113    /// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for
114    /// more details.
115    #[allow(dead_code)]
116    #[inline(always)]
117    pub fn mask(mut self, mask: EvexMasking) -> Self {
118        self.write(Self::aaa, mask.aaa_bits() as u32);
119        self.write(Self::z, mask.z_bit() as u32);
120        self
121    }
122
123    /// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive
124    /// source register in 3-operand instructions (e.g. 2 read, 1 write).
125    #[allow(dead_code)]
126    #[inline(always)]
127    pub fn vvvvv(mut self, reg: impl Into<Register>) -> Self {
128        let reg = reg.into();
129        self.write(Self::vvvv, !(reg.0 as u32) & 0b1111);
130        self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1);
131        self
132    }
133
134    /// Set the register to use for the `rm` bits; many instructions use this as the "read from
135    /// register/memory" operand. Currently this does not support memory addressing (TODO).Setting
136    /// this affects both the ModRM byte (`rm` section) and the EVEX prefix (the extension bits for
137    /// register encodings > 8).
138    #[inline(always)]
139    pub fn rm(mut self, reg: impl Into<Register>) -> Self {
140        self.rm = reg.into();
141        let b = !(self.rm.0 >> 3) & 1;
142        let x = !(self.rm.0 >> 4) & 1;
143        self.write(Self::X, x as u32);
144        self.write(Self::B, b as u32);
145        self
146    }
147
148    /// Emit the EVEX-encoded instruction to the code sink:
149    /// - first, the 4-byte EVEX prefix;
150    /// - then, the opcode byte;
151    /// - finally, the ModR/M byte.
152    ///
153    /// Eventually this method should support encodings of more than just the reg-reg addressing mode (TODO).
154    pub fn encode<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
155        sink.put4(self.bits);
156        sink.put1(self.opcode);
157        sink.put1(encode_modrm(3, self.reg.0 & 7, self.rm.0 & 7));
158    }
159
160    // In order to simplify the encoding of the various bit ranges in the prefix, we specify those
161    // ranges according to the table below (extracted from the Intel Software Development Manual,
162    // volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this
163    // chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit
164    // 8, leaving bits 0-7 for the mandatory `0x62`.
165    //         ┌───┬───┬───┬───┬───┬───┬───┬───┐
166    // Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
167    //         ├───┼───┼───┼───┼───┼───┼───┼───┤
168    // Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
169    //         ├───┼───┼───┼───┼───┼───┼───┼───┤
170    // Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
171    //         └───┴───┴───┴───┴───┴───┴───┴───┘
172
173    // Byte 1:
174    const mm: RangeInclusive<u8> = 8..=9;
175    const R_: RangeInclusive<u8> = 12..=12;
176    const B: RangeInclusive<u8> = 13..=13;
177    const X: RangeInclusive<u8> = 14..=14;
178    const R: RangeInclusive<u8> = 15..=15;
179
180    // Byte 2:
181    const pp: RangeInclusive<u8> = 16..=17;
182    const vvvv: RangeInclusive<u8> = 19..=22;
183    const W: RangeInclusive<u8> = 23..=23;
184
185    // Byte 3:
186    const aaa: RangeInclusive<u8> = 24..=26;
187    const V_: RangeInclusive<u8> = 27..=27;
188    #[allow(dead_code)] // Will be used once broadcast and rounding controls are exposed.
189    const b: RangeInclusive<u8> = 28..=28;
190    const LL: RangeInclusive<u8> = 29..=30;
191    const z: RangeInclusive<u8> = 31..=31;
192
193    // A convenience method for writing the `value` bits to the given range in `self.bits`.
194    #[inline]
195    fn write(&mut self, range: RangeInclusive<u8>, value: u32) {
196        assert!(ExactSizeIterator::len(&range) > 0);
197        let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
198        let mask: u32 = (1 << size) - 1; // Generate a bit mask.
199        debug_assert!(
200            value <= mask,
201            "The written value should have fewer than {} bits.",
202            size
203        );
204        let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
205        self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through.
206        let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`).
207        self.bits |= value; // Modify the bits in `range`.
208    }
209}
210
211/// Describe the register index to use. This wrapper is a type-safe way to pass
212/// around the registers defined in `inst/regs.rs`.
213#[derive(Copy, Clone, Default)]
214pub struct Register(u8);
215impl From<u8> for Register {
216    fn from(reg: u8) -> Self {
217        debug_assert!(reg < 16);
218        Self(reg)
219    }
220}
221impl Into<u8> for Register {
222    fn into(self) -> u8 {
223        self.0
224    }
225}
226
227/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
228/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
229/// used together for certain classes of instructions; i.e., special care should be taken to ensure
230/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
231/// opcodes can result in an #UD.
232#[allow(dead_code, missing_docs)] // Rounding and broadcast modes are not yet used.
233pub enum EvexContext {
234    RoundingRegToRegFP {
235        rc: EvexRoundingControl,
236    },
237    NoRoundingFP {
238        sae: bool,
239        length: EvexVectorLength,
240    },
241    MemoryOp {
242        broadcast: bool,
243        length: EvexVectorLength,
244    },
245    Other {
246        length: EvexVectorLength,
247    },
248}
249
250impl Default for EvexContext {
251    fn default() -> Self {
252        Self::Other {
253            length: EvexVectorLength::default(),
254        }
255    }
256}
257
258impl EvexContext {
259    /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
260    pub fn bits(&self) -> u8 {
261        match self {
262            Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
263            Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
264            Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
265            Self::Other { length } => length.bits() << 1,
266        }
267    }
268}
269
270/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
271#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used.
272pub enum EvexVectorLength {
273    V128,
274    V256,
275    V512,
276}
277
278impl EvexVectorLength {
279    /// Encode the `L'` and `L` bits for merging with the P2 byte.
280    fn bits(&self) -> u8 {
281        match self {
282            Self::V128 => 0b00,
283            Self::V256 => 0b01,
284            Self::V512 => 0b10,
285            // 0b11 is reserved (#UD).
286        }
287    }
288}
289
290impl Default for EvexVectorLength {
291    fn default() -> Self {
292        Self::V128
293    }
294}
295
296/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
297#[allow(dead_code, missing_docs)] // Rounding controls are not yet used.
298pub enum EvexRoundingControl {
299    RNE,
300    RD,
301    RU,
302    RZ,
303}
304
305impl EvexRoundingControl {
306    /// Encode the `L'` and `L` bits for merging with the P2 byte.
307    fn bits(&self) -> u8 {
308        match self {
309            Self::RNE => 0b00,
310            Self::RD => 0b01,
311            Self::RU => 0b10,
312            Self::RZ => 0b11,
313        }
314    }
315}
316
317/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
318/// Software Development Manual, volume 2A.
319#[allow(dead_code, missing_docs)] // Masking is not yet used.
320pub enum EvexMasking {
321    None,
322    Merging { k: u8 },
323    Zeroing { k: u8 },
324}
325
326impl Default for EvexMasking {
327    fn default() -> Self {
328        EvexMasking::None
329    }
330}
331
332impl EvexMasking {
333    /// Encode the `z` bit for merging with the P2 byte.
334    pub fn z_bit(&self) -> u8 {
335        match self {
336            Self::None | Self::Merging { .. } => 0,
337            Self::Zeroing { .. } => 1,
338        }
339    }
340
341    /// Encode the `aaa` bits for merging with the P2 byte.
342    pub fn aaa_bits(&self) -> u8 {
343        match self {
344            Self::None => 0b000,
345            Self::Merging { k } | Self::Zeroing { k } => {
346                debug_assert!(*k <= 7);
347                *k
348            }
349        }
350    }
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356    use crate::isa::x64::inst::regs;
357    use std::vec::Vec;
358
359    // As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0},
360    // xmm1'` matches this EVEX encoding machinery.
361    #[test]
362    fn vpabsq() {
363        let dst = regs::xmm0();
364        let src = regs::xmm1();
365        let mut sink0 = Vec::new();
366
367        EvexInstruction::new()
368            .prefix(LegacyPrefixes::_66)
369            .map(OpcodeMap::_0F38)
370            .w(true)
371            .opcode(0x1F)
372            .reg(dst.to_real_reg().unwrap().hw_enc())
373            .rm(src.to_real_reg().unwrap().hw_enc())
374            .length(EvexVectorLength::V128)
375            .encode(&mut sink0);
376
377        assert_eq!(sink0, vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1]);
378    }
379
380    /// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the
381    /// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This
382    /// test is more interesting than it may appear because some of the parameters have flipped-bit
383    /// representations (e.g. `vvvvv`) so emitting 0s as a default will not work.
384    #[test]
385    fn default_emission() {
386        let mut sink0 = Vec::new();
387        EvexInstruction::new().encode(&mut sink0);
388
389        let mut sink1 = Vec::new();
390        EvexInstruction::new()
391            .length(EvexVectorLength::V128)
392            .prefix(LegacyPrefixes::None)
393            .map(OpcodeMap::None)
394            .w(false)
395            .opcode(0x00)
396            .reg(regs::rax().to_real_reg().unwrap().hw_enc())
397            .rm(regs::rax().to_real_reg().unwrap().hw_enc())
398            .mask(EvexMasking::None)
399            .encode(&mut sink1);
400
401        assert_eq!(sink0, sink1);
402    }
403}