polkavm_assembler/
assembler.rs

1use alloc::vec::Vec;
2
3#[derive(Copy, Clone)]
4struct Fixup {
5    target_label: Label,
6    instruction_offset: usize,
7    instruction_length: u8,
8    fixup_offset: u8,
9    fixup_length: u8,
10}
11
12pub struct Assembler {
13    origin: u64,
14    code: Vec<u8>,
15    labels: Vec<isize>,
16    fixups: Vec<Fixup>,
17    guaranteed_capacity: usize,
18}
19
20#[allow(clippy::derivable_impls)]
21impl Default for Assembler {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27#[repr(transparent)]
28pub struct AssembledCode<'a>(&'a mut Assembler);
29
30impl<'a> core::ops::Deref for AssembledCode<'a> {
31    type Target = [u8];
32
33    #[inline]
34    fn deref(&self) -> &Self::Target {
35        &self.0.code
36    }
37}
38
39impl<'a> From<AssembledCode<'a>> for Vec<u8> {
40    fn from(code: AssembledCode<'a>) -> Vec<u8> {
41        core::mem::take(&mut code.0.code)
42    }
43}
44
45impl<'a> Drop for AssembledCode<'a> {
46    fn drop(&mut self) {
47        self.0.clear();
48    }
49}
50
51impl Assembler {
52    pub const fn new() -> Self {
53        Assembler {
54            origin: 0,
55            code: Vec::new(),
56            labels: Vec::new(),
57            fixups: Vec::new(),
58            guaranteed_capacity: 0,
59        }
60    }
61
62    pub fn set_origin(&mut self, origin: u64) {
63        self.origin = origin;
64    }
65
66    pub fn current_address(&self) -> u64 {
67        self.origin + self.code.len() as u64
68    }
69
70    pub fn forward_declare_label(&mut self) -> Label {
71        let label = self.labels.len() as u32;
72        self.labels.push(isize::MAX);
73        Label(label)
74    }
75
76    pub fn create_label(&mut self) -> Label {
77        let label = self.labels.len() as u32;
78        self.labels.push(self.code.len() as isize);
79        Label(label)
80    }
81
82    pub fn define_label(&mut self, label: Label) -> &mut Self {
83        assert_eq!(
84            self.labels[label.0 as usize],
85            isize::MAX,
86            "tried to redefine an already defined label"
87        );
88        self.labels[label.0 as usize] = self.code.len() as isize;
89        self
90    }
91
92    pub fn push_with_label<T>(&mut self, label: Label, instruction: Instruction<T>) -> &mut Self
93    where
94        T: core::fmt::Display,
95    {
96        self.define_label(label);
97        self.push(instruction)
98    }
99
100    #[inline]
101    pub fn get_label_origin_offset(&self, label: Label) -> Option<isize> {
102        let offset = self.labels[label.0 as usize];
103        if offset == isize::MAX {
104            None
105        } else {
106            Some(offset)
107        }
108    }
109
110    pub fn get_label_origin_offset_or_panic(&self, label: Label) -> isize {
111        self.get_label_origin_offset(label)
112            .expect("tried to fetch a label offset for a label that was not defined")
113    }
114
115    pub fn set_label_origin_offset(&mut self, label: Label, offset: isize) {
116        self.labels[label.0 as usize] = offset;
117    }
118
119    #[inline(always)]
120    fn add_fixup(
121        &mut self,
122        instruction_offset: usize,
123        instruction_length: usize,
124        InstFixup {
125            target_label,
126            fixup_offset,
127            fixup_length,
128        }: InstFixup,
129    ) {
130        debug_assert!((target_label.0 as usize) < self.labels.len());
131        debug_assert!(
132            (fixup_offset as usize) < instruction_length,
133            "instruction is {} bytes long and yet its target fixup starts at {}",
134            instruction_length,
135            fixup_offset
136        );
137        debug_assert!((fixup_length as usize) < instruction_length);
138        debug_assert!((fixup_offset as usize + fixup_length as usize) <= instruction_length);
139        self.fixups.push(Fixup {
140            target_label,
141            instruction_offset,
142            instruction_length: instruction_length as u8,
143            fixup_offset,
144            fixup_length,
145        });
146    }
147
148    #[inline(always)]
149    pub fn reserve<const INSTRUCTIONS: usize>(&mut self) {
150        InstBuf::reserve::<INSTRUCTIONS>(&mut self.code);
151        self.guaranteed_capacity = INSTRUCTIONS;
152    }
153
154    #[cfg_attr(not(debug_assertions), inline(always))]
155    pub fn push<T>(&mut self, instruction: Instruction<T>) -> &mut Self
156    where
157        T: core::fmt::Display,
158    {
159        #[cfg(debug_assertions)]
160        log::trace!("{:08x}: {}", self.origin + self.code.len() as u64, instruction);
161
162        if self.guaranteed_capacity == 0 {
163            InstBuf::reserve::<1>(&mut self.code);
164            self.guaranteed_capacity = 1;
165        }
166
167        let instruction_offset = self.code.len();
168
169        // SAFETY: We've reserved space for at least one instruction.
170        unsafe {
171            instruction.bytes.encode_into_vec_unsafe(&mut self.code);
172        }
173        self.guaranteed_capacity -= 1;
174
175        if let Some(fixup) = instruction.fixup {
176            self.add_fixup(instruction_offset, instruction.bytes.len(), fixup);
177        }
178
179        self
180    }
181
182    pub fn push_raw(&mut self, bytes: &[u8]) -> &mut Self {
183        self.code.extend_from_slice(bytes);
184        self
185    }
186
187    pub fn finalize(&mut self) -> AssembledCode {
188        for fixup in self.fixups.drain(..) {
189            let origin = fixup.instruction_offset + fixup.instruction_length as usize;
190            let target_absolute = self.labels[fixup.target_label.0 as usize];
191            assert_ne!(target_absolute, isize::MAX);
192            let offset = target_absolute - origin as isize;
193            let p = fixup.instruction_offset + fixup.fixup_offset as usize;
194            if fixup.fixup_length == 1 {
195                if offset > i8::MAX as isize || offset < i8::MIN as isize {
196                    panic!("out of range jump");
197                }
198                self.code[p] = offset as i8 as u8;
199            } else if fixup.fixup_length == 4 {
200                if offset > i32::MAX as isize || offset < i32::MIN as isize {
201                    panic!("out of range jump");
202                }
203                self.code[p..p + 4].copy_from_slice(&(offset as i32).to_le_bytes());
204            } else {
205                unreachable!()
206            }
207        }
208
209        AssembledCode(self)
210    }
211
212    pub fn is_empty(&self) -> bool {
213        self.code.is_empty()
214    }
215
216    pub fn len(&self) -> usize {
217        self.code.len()
218    }
219
220    pub fn code_mut(&mut self) -> &mut [u8] {
221        &mut self.code
222    }
223
224    pub fn spare_capacity(&self) -> usize {
225        self.code.capacity() - self.code.len()
226    }
227
228    pub fn resize(&mut self, size: usize, fill_with: u8) {
229        self.code.resize(size, fill_with)
230    }
231
232    pub fn reserve_code(&mut self, length: usize) {
233        self.code.reserve(length);
234    }
235
236    pub fn reserve_labels(&mut self, length: usize) {
237        self.labels.reserve(length);
238    }
239
240    pub fn reserve_fixups(&mut self, length: usize) {
241        self.fixups.reserve(length);
242    }
243
244    pub fn clear(&mut self) {
245        self.origin = 0;
246        self.code.clear();
247        self.labels.clear();
248        self.fixups.clear();
249    }
250}
251
252#[derive(Copy, Clone, PartialEq, Eq, Debug)]
253#[repr(transparent)]
254pub struct Label(u32);
255
256impl core::fmt::Display for Label {
257    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
258        fmt.write_fmt(core::format_args!("<{}>", self.0))
259    }
260}
261
262#[derive(Copy, Clone)]
263pub struct Instruction<T> {
264    pub(crate) instruction: T,
265    pub(crate) bytes: InstBuf,
266    pub(crate) fixup: Option<InstFixup>,
267}
268
269impl<T> core::fmt::Debug for Instruction<T>
270where
271    T: core::fmt::Debug,
272{
273    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
274        self.instruction.fmt(fmt)
275    }
276}
277
278impl<T> core::fmt::Display for Instruction<T>
279where
280    T: core::fmt::Display,
281{
282    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
283        self.instruction.fmt(fmt)
284    }
285}
286
287impl<T> Instruction<T> {
288    #[allow(clippy::len_without_is_empty)]
289    #[inline]
290    pub fn len(&self) -> usize {
291        self.bytes.len()
292    }
293}
294
295#[derive(Copy, Clone)]
296pub struct InstFixup {
297    pub(crate) target_label: Label,
298    pub(crate) fixup_offset: u8,
299    pub(crate) fixup_length: u8,
300}
301
302const MAXIMUM_INSTRUCTION_SIZE: usize = 16;
303
304#[derive(Copy, Clone)]
305pub struct InstBuf {
306    out_1: u64,
307    out_2: u64,
308    length: usize,
309}
310
311#[allow(clippy::new_without_default)]
312impl InstBuf {
313    #[inline]
314    pub fn new() -> Self {
315        Self {
316            out_1: 0,
317            out_2: 0,
318            length: 0,
319        }
320    }
321
322    #[inline]
323    pub fn len(&self) -> usize {
324        self.length
325    }
326
327    #[inline]
328    pub fn append(&mut self, byte: u8) {
329        if self.length < 8 {
330            self.out_1 |= u64::from(byte) << (self.length * 8);
331        } else {
332            self.out_2 |= u64::from(byte) << ((self.length - 8) * 8);
333        }
334
335        self.length += 1;
336    }
337
338    #[inline]
339    pub fn append2(&mut self, bytes: [u8; 2]) {
340        self.append(bytes[0]);
341        self.append(bytes[1]);
342    }
343
344    #[inline]
345    pub fn append4(&mut self, bytes: [u8; 4]) {
346        self.append(bytes[0]);
347        self.append(bytes[1]);
348        self.append(bytes[2]);
349        self.append(bytes[3]);
350    }
351
352    #[inline]
353    unsafe fn encode_into_raw(self, output: *mut u8) {
354        core::ptr::write_unaligned(output.cast::<u64>(), u64::from_le(self.out_1));
355        core::ptr::write_unaligned(output.add(8).cast::<u64>(), u64::from_le(self.out_2));
356    }
357
358    #[allow(clippy::debug_assert_with_mut_call)]
359    #[inline]
360    unsafe fn encode_into_vec_unsafe(self, output: &mut Vec<u8>) {
361        debug_assert!(output.spare_capacity_mut().len() >= MAXIMUM_INSTRUCTION_SIZE);
362
363        self.encode_into_raw(output.spare_capacity_mut().as_mut_ptr().cast());
364        let new_length = output.len() + self.length;
365        output.set_len(new_length);
366    }
367
368    #[cold]
369    #[inline(never)]
370    fn reserve_impl(output: &mut Vec<u8>, length: usize) {
371        output.reserve(length);
372    }
373
374    #[inline(always)]
375    fn reserve<const INSTRUCTIONS: usize>(output: &mut Vec<u8>) {
376        let count = INSTRUCTIONS.checked_mul(MAXIMUM_INSTRUCTION_SIZE).unwrap();
377        if output.spare_capacity_mut().len() < count {
378            Self::reserve_impl(output, count);
379            if output.spare_capacity_mut().len() < count {
380                // SAFETY: `reserve` made sure that we have this much capacity, so this is safe.
381                unsafe {
382                    core::hint::unreachable_unchecked();
383                }
384            }
385        }
386    }
387
388    #[inline]
389    pub fn from_array<const N: usize>(array: [u8; N]) -> Self {
390        if N > MAXIMUM_INSTRUCTION_SIZE {
391            panic!();
392        }
393
394        let mut out = Self::new();
395        for value in array {
396            out.append(value);
397        }
398        out
399    }
400
401    pub fn to_vec(self) -> Vec<u8> {
402        let mut vec = Vec::with_capacity(MAXIMUM_INSTRUCTION_SIZE);
403
404        // SAFETY: We've reserved space for at least one instruction.
405        unsafe {
406            self.encode_into_vec_unsafe(&mut vec);
407        }
408
409        vec
410    }
411}
412
413#[test]
414fn test_inst_buf() {
415    assert_eq!(InstBuf::from_array([0x01]).to_vec(), [0x01]);
416    assert_eq!(InstBuf::from_array([0x01, 0x02]).to_vec(), [0x01, 0x02]);
417    assert_eq!(InstBuf::from_array([0x01, 0x02, 0x03]).to_vec(), [0x01, 0x02, 0x03]);
418    assert_eq!(InstBuf::from_array([0x01, 0x02, 0x03, 0x04]).to_vec(), [0x01, 0x02, 0x03, 0x04]);
419    assert_eq!(
420        InstBuf::from_array([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]).to_vec(),
421        [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]
422    );
423    assert_eq!(
424        InstBuf::from_array([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]).to_vec(),
425        [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]
426    );
427    assert_eq!(
428        InstBuf::from_array([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A]).to_vec(),
429        [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A]
430    );
431}