polkavm_common/
writer.rs

1use crate::program::{
2    self, Instruction, InstructionSet, InstructionSetKind, ProgramCounter, ProgramSymbol, BLOB_LEN_OFFSET, BLOB_LEN_SIZE,
3};
4use alloc::boxed::Box;
5use alloc::string::String;
6use alloc::vec::Vec;
7use core::ops::Range;
8
9#[derive(Copy, Clone, Default)]
10struct InstructionBuffer {
11    bytes: [u8; program::MAX_INSTRUCTION_LENGTH],
12    length: u8,
13}
14
15impl InstructionBuffer {
16    fn len(&self) -> usize {
17        self.length as usize
18    }
19
20    fn new(isa: InstructionSetKind, position: u32, minimum_size: u8, instruction: Instruction) -> Self {
21        let mut buffer = Self {
22            bytes: [0; program::MAX_INSTRUCTION_LENGTH],
23            length: 0,
24        };
25
26        let minimum_size = minimum_size as usize;
27        let mut length = instruction.serialize_into(isa, position, &mut buffer.bytes);
28        if length < minimum_size {
29            let Instruction::jump(target) = instruction else {
30                // We currently only need this for jumps.
31                unreachable!();
32            };
33            assert!(minimum_size >= 1 && minimum_size <= 5);
34
35            buffer.bytes[1..minimum_size].copy_from_slice(&u32::to_le_bytes(target.wrapping_sub(position))[..minimum_size - 1]);
36            length = minimum_size;
37        }
38
39        buffer.length = length as u8;
40        buffer
41    }
42}
43
44impl core::ops::Deref for InstructionBuffer {
45    type Target = [u8];
46    fn deref(&self) -> &Self::Target {
47        &self.bytes[..self.length as usize]
48    }
49}
50
51impl Instruction {
52    fn target_mut(&mut self) -> Option<&mut u32> {
53        match self {
54            Instruction::jump(ref mut target)
55            | Instruction::load_imm_and_jump(_, _, ref mut target)
56            | Instruction::branch_eq_imm(_, _, ref mut target)
57            | Instruction::branch_not_eq_imm(_, _, ref mut target)
58            | Instruction::branch_less_unsigned_imm(_, _, ref mut target)
59            | Instruction::branch_less_signed_imm(_, _, ref mut target)
60            | Instruction::branch_greater_or_equal_unsigned_imm(_, _, ref mut target)
61            | Instruction::branch_greater_or_equal_signed_imm(_, _, ref mut target)
62            | Instruction::branch_less_or_equal_signed_imm(_, _, ref mut target)
63            | Instruction::branch_less_or_equal_unsigned_imm(_, _, ref mut target)
64            | Instruction::branch_greater_signed_imm(_, _, ref mut target)
65            | Instruction::branch_greater_unsigned_imm(_, _, ref mut target)
66            | Instruction::branch_eq(_, _, ref mut target)
67            | Instruction::branch_not_eq(_, _, ref mut target)
68            | Instruction::branch_less_unsigned(_, _, ref mut target)
69            | Instruction::branch_less_signed(_, _, ref mut target)
70            | Instruction::branch_greater_or_equal_unsigned(_, _, ref mut target)
71            | Instruction::branch_greater_or_equal_signed(_, _, ref mut target) => Some(target),
72            _ => None,
73        }
74    }
75}
76
77#[derive(Copy, Clone)]
78struct SerializedInstruction {
79    instruction: Instruction,
80    bytes: InstructionBuffer,
81    target_nth_instruction: Option<usize>,
82    position: u32,
83    minimum_size: u8,
84}
85
86#[derive(Clone)]
87pub struct ProgramBlobBuilder {
88    isa: InstructionSetKind,
89    ro_data_size: u32,
90    rw_data_size: u32,
91    stack_size: u32,
92    ro_data: Vec<u8>,
93    rw_data: Vec<u8>,
94    imports: Vec<ProgramSymbol<Box<[u8]>>>,
95    exports: Vec<(Export, ProgramSymbol<Box<[u8]>>)>,
96    code: Vec<Instruction>,
97    jump_table: Vec<u32>,
98    custom: Vec<(u8, Vec<u8>)>,
99    dispatch_table: Vec<Vec<u8>>,
100    ignore_instruction_set_incompatibility: bool,
101}
102
103struct SerializedCode {
104    jump_table: Vec<u8>,
105    jump_table_entry_count: u32,
106    jump_table_entry_size: u8,
107    code: Vec<u8>,
108    bitmask: Vec<u8>,
109    exports: Vec<(u32, Vec<u8>)>,
110}
111
112#[derive(Copy, Clone)]
113enum Export {
114    ByBlock(u32),
115    ByInstruction(u32),
116}
117
118impl ProgramBlobBuilder {
119    pub fn new(isa: InstructionSetKind) -> Self {
120        Self {
121            isa,
122            ro_data_size: Default::default(),
123            rw_data_size: Default::default(),
124            stack_size: Default::default(),
125            ro_data: Default::default(),
126            rw_data: Default::default(),
127            imports: Default::default(),
128            exports: Default::default(),
129            code: Default::default(),
130            jump_table: Default::default(),
131            custom: Default::default(),
132            dispatch_table: Default::default(),
133            ignore_instruction_set_incompatibility: false,
134        }
135    }
136
137    pub fn set_ignore_instruction_set_incompatibility(&mut self, value: bool) {
138        self.ignore_instruction_set_incompatibility = value;
139    }
140
141    pub fn set_ro_data_size(&mut self, size: u32) {
142        self.ro_data_size = size;
143    }
144
145    pub fn set_rw_data_size(&mut self, size: u32) {
146        self.rw_data_size = size;
147    }
148
149    pub fn set_stack_size(&mut self, size: u32) {
150        self.stack_size = size;
151    }
152
153    pub fn set_ro_data(&mut self, data: Vec<u8>) {
154        self.ro_data = data;
155    }
156
157    pub fn set_rw_data(&mut self, data: Vec<u8>) {
158        self.rw_data = data;
159    }
160
161    pub fn add_import(&mut self, import: &[u8]) {
162        self.imports.push(ProgramSymbol::new(import.into()));
163    }
164
165    pub fn add_export_by_basic_block(&mut self, target_basic_block: u32, symbol: &[u8]) {
166        self.exports
167            .push((Export::ByBlock(target_basic_block), ProgramSymbol::new(symbol.into())));
168    }
169
170    pub fn add_export_by_instruction(&mut self, target_instruction: u32, symbol: &[u8]) {
171        self.exports
172            .push((Export::ByInstruction(target_instruction), ProgramSymbol::new(symbol.into())));
173    }
174
175    pub fn add_dispatch_table_entry(&mut self, symbol: impl Into<Vec<u8>>) {
176        self.dispatch_table.push(symbol.into());
177    }
178
179    pub fn set_code(&mut self, code: &[Instruction], jump_table: &[u32]) {
180        self.code = code.to_vec();
181        self.jump_table = jump_table.to_vec();
182    }
183
184    fn serialize_code(&self) -> Result<SerializedCode, String> {
185        fn mutate<T>(slot: &mut T, value: T) -> bool
186        where
187            T: PartialEq,
188        {
189            if *slot == value {
190                false
191            } else {
192                *slot = value;
193                true
194            }
195        }
196
197        // We will need to shift all of the basic block indexes by how many entries are in our injected dispatch table.
198        let basic_block_shift = self.dispatch_table.len() as u32;
199
200        let mut instructions = Vec::with_capacity(self.dispatch_table.len() + self.code.len());
201        for (nth, symbol) in self.dispatch_table.iter().enumerate() {
202            let Some(&(target, _)) = self.exports.iter().find(|(_, export_symbol)| symbol == export_symbol.as_bytes()) else {
203                return Err(alloc::format!(
204                    "failed to build a dispatch table: symbol not found: {}",
205                    ProgramSymbol::new(symbol)
206                ));
207            };
208
209            let minimum_size = if nth + 1 == self.dispatch_table.len() {
210                // The last entry doesn't have to be padded.
211                0
212            } else {
213                5
214            };
215
216            let target_basic_block = match target {
217                Export::ByBlock(target) => target,
218                Export::ByInstruction(..) => {
219                    return Err(alloc::format!(
220                        "failed to build a dispatch table: points to a symbol which is not basic block based: {}",
221                        ProgramSymbol::new(symbol)
222                    ));
223                }
224            };
225
226            instructions.push(SerializedInstruction {
227                instruction: Instruction::jump(target_basic_block + basic_block_shift),
228                bytes: InstructionBuffer::default(),
229                target_nth_instruction: None,
230                position: 0,
231                minimum_size,
232            });
233        }
234
235        for instruction in &self.code {
236            let mut instruction = *instruction;
237            if let Some(target_basic_block) = instruction.target_mut() {
238                *target_basic_block += basic_block_shift;
239            }
240
241            if !self.ignore_instruction_set_incompatibility {
242                let opcode = instruction.opcode();
243                if !self.isa.supports_opcode(opcode) {
244                    return Err(alloc::format!(
245                        "failed to build a program: the instruction '{}' is not available in the '{}' instruction set",
246                        opcode.name(),
247                        self.isa.name(),
248                    ));
249                }
250            }
251
252            instructions.push(SerializedInstruction {
253                instruction,
254                bytes: InstructionBuffer::default(),
255                target_nth_instruction: None,
256                position: 0,
257                minimum_size: 0,
258            });
259        }
260
261        let mut basic_block_to_instruction_index = Vec::with_capacity(self.code.len());
262        basic_block_to_instruction_index.push(0);
263
264        for (nth_instruction, entry) in instructions.iter().enumerate() {
265            if entry.instruction.opcode().starts_new_basic_block() {
266                basic_block_to_instruction_index.push(nth_instruction + 1);
267            }
268        }
269
270        let mut position: u32 = 0;
271        for (nth_instruction, entry) in instructions.iter_mut().enumerate() {
272            entry.target_nth_instruction = entry.instruction.target_mut().map(|target| {
273                let target_nth_instruction = basic_block_to_instruction_index[*target as usize];
274                // Here we change the target from a basic block index into a byte offset.
275                // This is completely inaccurate, but that's fine. This is just a guess, and we'll correct it in the next loop.
276                *target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32);
277                target_nth_instruction
278            });
279
280            entry.position = position;
281            entry.bytes = InstructionBuffer::new(self.isa, position, entry.minimum_size, entry.instruction);
282            position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions");
283        }
284
285        // Adjust offsets to other instructions until we reach a steady state.
286        loop {
287            let mut any_modified = false;
288            position = 0;
289            for nth_instruction in 0..instructions.len() {
290                let mut self_modified = mutate(&mut instructions[nth_instruction].position, position);
291                if let Some(target_nth_instruction) = instructions[nth_instruction].target_nth_instruction {
292                    let new_target = instructions[target_nth_instruction].position;
293                    let old_target = instructions[nth_instruction].instruction.target_mut().unwrap();
294                    self_modified |= mutate(old_target, new_target);
295
296                    if self_modified {
297                        instructions[nth_instruction].bytes = InstructionBuffer::new(
298                            self.isa,
299                            position,
300                            instructions[nth_instruction].minimum_size,
301                            instructions[nth_instruction].instruction,
302                        );
303                    }
304                }
305
306                position = position
307                    .checked_add(instructions[nth_instruction].bytes.len() as u32)
308                    .expect("too many instructions");
309
310                any_modified |= self_modified;
311            }
312
313            if !any_modified {
314                break;
315            }
316        }
317
318        let mut jump_table_entry_size = 0;
319        let mut jump_table_entries = Vec::with_capacity(self.jump_table.len());
320        for &target in &self.jump_table {
321            let target = target + basic_block_shift;
322            let target_nth_instruction = basic_block_to_instruction_index[target as usize];
323            let offset = instructions[target_nth_instruction].position.to_le_bytes();
324            jump_table_entries.push(offset);
325            jump_table_entry_size = core::cmp::max(jump_table_entry_size, offset.iter().take_while(|&&b| b != 0).count());
326        }
327
328        let mut output = SerializedCode {
329            jump_table_entry_count: jump_table_entries.len() as u32,
330            jump_table_entry_size: jump_table_entry_size as u8,
331            jump_table: Vec::with_capacity(jump_table_entry_size * jump_table_entries.len()),
332            code: Vec::with_capacity(instructions.iter().map(|entry| entry.bytes.len()).sum()),
333            bitmask: Vec::new(),
334            exports: Vec::with_capacity(self.exports.len()),
335        };
336
337        for target in jump_table_entries {
338            output.jump_table.extend_from_slice(&target[..jump_table_entry_size]);
339        }
340
341        struct BitVec {
342            bytes: Vec<u8>,
343            current: usize,
344            bits: usize,
345        }
346
347        impl BitVec {
348            fn with_capacity(capacity: usize) -> Self {
349                BitVec {
350                    bytes: Vec::with_capacity(capacity),
351                    current: 0,
352                    bits: 0,
353                }
354            }
355
356            fn push(&mut self, value: bool) {
357                self.current |= usize::from(value) << self.bits;
358                self.bits += 1;
359                if self.bits == 8 {
360                    self.bytes.push(self.current as u8);
361                    self.current = 0;
362                    self.bits = 0;
363                }
364            }
365
366            fn finish(mut self) -> Vec<u8> {
367                while self.bits > 0 {
368                    self.push(false);
369                }
370                self.bytes
371            }
372        }
373
374        let mut bitmask = BitVec::with_capacity(output.code.capacity() / 8 + 1);
375        for entry in &instructions {
376            bitmask.push(true);
377            for _ in 1..entry.bytes.len() {
378                bitmask.push(false);
379            }
380
381            output.code.extend_from_slice(&entry.bytes);
382        }
383
384        output.bitmask = bitmask.finish();
385
386        for (target, symbol) in &self.exports {
387            let nth_instruction = match target {
388                Export::ByBlock(target_basic_block) => {
389                    let target_basic_block = *target_basic_block as usize + basic_block_shift as usize;
390                    basic_block_to_instruction_index[target_basic_block]
391                }
392                Export::ByInstruction(nth_instruction) => *nth_instruction as usize,
393            };
394
395            let offset = instructions[nth_instruction].position;
396            output.exports.push((offset, symbol.as_bytes().to_vec()));
397        }
398
399        if cfg!(debug_assertions) {
400            // Sanity check.
401            let mut parsed = Vec::new();
402            let mut offsets = alloc::collections::BTreeSet::new();
403
404            let parsed_instructions: Vec<_> =
405                crate::program::Instructions::new_unbounded(self.isa, &output.code, &output.bitmask, 0).collect();
406            for instruction in parsed_instructions {
407                if instruction.offset.0 as usize == output.code.len() {
408                    // Implicit trap.
409                    debug_assert!(matches!(instruction.kind, Instruction::invalid));
410                    break;
411                }
412                parsed.push(instruction);
413                offsets.insert(instruction.offset);
414            }
415
416            assert_eq!(parsed.len(), instructions.len());
417            for (nth_instruction, (mut parsed, entry)) in parsed.into_iter().zip(instructions.into_iter()).enumerate() {
418                let parsed_length = parsed.next_offset.0 - parsed.offset.0;
419                let opcode_mismatch = parsed.kind != entry.instruction && !self.ignore_instruction_set_incompatibility;
420                if opcode_mismatch || entry.position != parsed.offset.0 || u32::from(entry.bytes.length) != parsed_length {
421                    panic!(
422                        concat!(
423                            "Broken serialization for instruction #{}:\n",
424                            "  Serialized:\n",
425                            "    Instruction: {:?}\n",
426                            "    Offset:      {}\n",
427                            "    Length:      {}\n",
428                            "    Bytes:       {:?}\n",
429                            "  Deserialized:\n",
430                            "    Instruction: {:?}\n",
431                            "    Offset:      {}\n",
432                            "    Length:      {}\n",
433                            "    Bytes:       {:?}\n",
434                        ),
435                        nth_instruction,
436                        entry.instruction,
437                        entry.position,
438                        entry.bytes.len(),
439                        &entry.bytes.bytes[..entry.bytes.length as usize],
440                        parsed.kind,
441                        parsed.offset.0,
442                        parsed_length,
443                        &output.code[parsed.offset.0 as usize..parsed.offset.0 as usize + parsed_length as usize],
444                    );
445                }
446
447                if let Some(target) = parsed.kind.target_mut() {
448                    assert!(offsets.contains(&ProgramCounter(*target)));
449                }
450            }
451        }
452
453        Ok(output)
454    }
455
456    pub fn add_custom_section(&mut self, section: u8, contents: Vec<u8>) {
457        self.custom.push((section, contents));
458    }
459
460    pub fn into_vec(self) -> Result<Vec<u8>, String> {
461        self.to_vec()
462    }
463
464    pub fn to_vec(&self) -> Result<Vec<u8>, String> {
465        let code = self.serialize_code()?;
466        let mut output = Vec::new();
467        let mut writer = Writer::new(&mut output);
468
469        writer.push_raw_bytes(&program::BLOB_MAGIC);
470        writer.push_byte(self.isa.blob_version());
471        writer.push_raw_bytes(&[0; BLOB_LEN_SIZE]);
472
473        if self.ro_data_size > 0 || self.rw_data_size > 0 || self.stack_size > 0 {
474            writer.push_section_inplace(program::SECTION_MEMORY_CONFIG, |writer| {
475                writer.push_varint(self.ro_data_size);
476                writer.push_varint(self.rw_data_size);
477                writer.push_varint(self.stack_size);
478            });
479        }
480
481        writer.push_section(program::SECTION_RO_DATA, &self.ro_data);
482        writer.push_section(program::SECTION_RW_DATA, &self.rw_data);
483        if !self.imports.is_empty() {
484            writer.push_section_inplace(program::SECTION_IMPORTS, |writer| {
485                let mut offsets_blob = Vec::new();
486                let mut symbols_blob = Vec::new();
487                for symbol in &self.imports {
488                    offsets_blob.extend_from_slice(&(symbols_blob.len() as u32).to_le_bytes());
489                    symbols_blob.extend_from_slice(symbol.as_bytes())
490                }
491
492                writer.push_varint(self.imports.len().try_into().expect("too many imports"));
493                writer.push_raw_bytes(&offsets_blob);
494                writer.push_raw_bytes(&symbols_blob);
495            });
496        }
497
498        if !code.exports.is_empty() {
499            writer.push_section_inplace(program::SECTION_EXPORTS, |writer| {
500                writer.push_varint(code.exports.len().try_into().expect("too many exports"));
501                for (offset, symbol) in code.exports {
502                    writer.push_varint(offset);
503                    writer.push_bytes_with_length(&symbol);
504                }
505            });
506        }
507
508        writer.push_section_inplace(program::SECTION_CODE_AND_JUMP_TABLE, |writer| {
509            writer.push_varint(code.jump_table_entry_count);
510            writer.push_byte(code.jump_table_entry_size);
511            writer.push_varint(code.code.len() as u32);
512            writer.push_raw_bytes(&code.jump_table);
513            writer.push_raw_bytes(&code.code);
514            writer.push_raw_bytes(&code.bitmask);
515        });
516
517        for (section, contents) in &self.custom {
518            writer.push_section(*section, contents);
519        }
520
521        writer.push_raw_bytes(&[program::SECTION_END_OF_FILE]);
522
523        let blob_len = (writer.len() as u64).to_le_bytes();
524        output[BLOB_LEN_OFFSET..BLOB_LEN_OFFSET + BLOB_LEN_SIZE].copy_from_slice(&blob_len);
525
526        Ok(output)
527    }
528}
529
530pub struct Writer<'a> {
531    buffer: &'a mut Vec<u8>,
532}
533
534impl<'a> Writer<'a> {
535    pub fn new(buffer: &'a mut Vec<u8>) -> Self {
536        Self { buffer }
537    }
538
539    fn push_section_inplace(&mut self, section: u8, callback: impl FnOnce(&mut Self)) -> Range<usize> {
540        let section_position = self.buffer.len();
541        self.buffer.push(section);
542
543        // Reserve the space for the length varint.
544        let length_position = self.buffer.len();
545        self.push_raw_bytes(&[0xff_u8; crate::varint::MAX_VARINT_LENGTH]);
546
547        let payload_position = self.buffer.len();
548        callback(self);
549
550        let payload_length: u32 = (self.buffer.len() - payload_position).try_into().expect("section size overflow");
551        if payload_length == 0 {
552            // Nothing was written by the callback. Skip writing the section.
553            self.buffer.truncate(section_position);
554            return 0..0;
555        }
556
557        // Write the length varint.
558        let length_length = crate::varint::write_varint(payload_length, &mut self.buffer[length_position..]);
559
560        // Drain any excess length varint bytes.
561        self.buffer
562            .drain(length_position + length_length..length_position + crate::varint::MAX_VARINT_LENGTH);
563
564        length_position + length_length..self.buffer.len()
565    }
566
567    fn push_section(&mut self, section: u8, contents: &[u8]) {
568        if contents.is_empty() {
569            return;
570        }
571
572        self.push_byte(section);
573        self.push_varint(contents.len().try_into().expect("section size overflow"));
574        self.push_raw_bytes(contents);
575    }
576
577    pub fn push_raw_bytes(&mut self, slice: &[u8]) {
578        self.buffer.extend_from_slice(slice);
579    }
580
581    pub fn push_byte(&mut self, byte: u8) {
582        self.buffer.push(byte);
583    }
584
585    pub fn push_u32(&mut self, value: u32) {
586        self.push_raw_bytes(&value.to_le_bytes());
587    }
588
589    pub fn push_varint(&mut self, value: u32) {
590        let mut buffer = [0xff_u8; crate::varint::MAX_VARINT_LENGTH];
591        let length = crate::varint::write_varint(value, &mut buffer);
592        self.push_raw_bytes(&buffer[..length]);
593    }
594
595    pub fn push_bytes_with_length(&mut self, slice: &[u8]) {
596        self.push_varint(slice.len().try_into().expect("length overflow"));
597        self.push_raw_bytes(slice);
598    }
599
600    pub fn len(&self) -> usize {
601        self.buffer.len()
602    }
603
604    pub fn is_empty(&self) -> bool {
605        self.buffer.is_empty()
606    }
607}