1use crate::program::{
2 self, Instruction, InstructionSet, InstructionSetKind, ProgramCounter, ProgramSymbol, BLOB_LEN_OFFSET, BLOB_LEN_SIZE,
3};
4use alloc::boxed::Box;
5use alloc::string::String;
6use alloc::vec::Vec;
7use core::ops::Range;
8
9#[derive(Copy, Clone, Default)]
10struct InstructionBuffer {
11 bytes: [u8; program::MAX_INSTRUCTION_LENGTH],
12 length: u8,
13}
14
15impl InstructionBuffer {
16 fn len(&self) -> usize {
17 self.length as usize
18 }
19
20 fn new(isa: InstructionSetKind, position: u32, minimum_size: u8, instruction: Instruction) -> Self {
21 let mut buffer = Self {
22 bytes: [0; program::MAX_INSTRUCTION_LENGTH],
23 length: 0,
24 };
25
26 let minimum_size = minimum_size as usize;
27 let mut length = instruction.serialize_into(isa, position, &mut buffer.bytes);
28 if length < minimum_size {
29 let Instruction::jump(target) = instruction else {
30 unreachable!();
32 };
33 assert!(minimum_size >= 1 && minimum_size <= 5);
34
35 buffer.bytes[1..minimum_size].copy_from_slice(&u32::to_le_bytes(target.wrapping_sub(position))[..minimum_size - 1]);
36 length = minimum_size;
37 }
38
39 buffer.length = length as u8;
40 buffer
41 }
42}
43
44impl core::ops::Deref for InstructionBuffer {
45 type Target = [u8];
46 fn deref(&self) -> &Self::Target {
47 &self.bytes[..self.length as usize]
48 }
49}
50
51impl Instruction {
52 fn target_mut(&mut self) -> Option<&mut u32> {
53 match self {
54 Instruction::jump(ref mut target)
55 | Instruction::load_imm_and_jump(_, _, ref mut target)
56 | Instruction::branch_eq_imm(_, _, ref mut target)
57 | Instruction::branch_not_eq_imm(_, _, ref mut target)
58 | Instruction::branch_less_unsigned_imm(_, _, ref mut target)
59 | Instruction::branch_less_signed_imm(_, _, ref mut target)
60 | Instruction::branch_greater_or_equal_unsigned_imm(_, _, ref mut target)
61 | Instruction::branch_greater_or_equal_signed_imm(_, _, ref mut target)
62 | Instruction::branch_less_or_equal_signed_imm(_, _, ref mut target)
63 | Instruction::branch_less_or_equal_unsigned_imm(_, _, ref mut target)
64 | Instruction::branch_greater_signed_imm(_, _, ref mut target)
65 | Instruction::branch_greater_unsigned_imm(_, _, ref mut target)
66 | Instruction::branch_eq(_, _, ref mut target)
67 | Instruction::branch_not_eq(_, _, ref mut target)
68 | Instruction::branch_less_unsigned(_, _, ref mut target)
69 | Instruction::branch_less_signed(_, _, ref mut target)
70 | Instruction::branch_greater_or_equal_unsigned(_, _, ref mut target)
71 | Instruction::branch_greater_or_equal_signed(_, _, ref mut target) => Some(target),
72 _ => None,
73 }
74 }
75}
76
77#[derive(Copy, Clone)]
78struct SerializedInstruction {
79 instruction: Instruction,
80 bytes: InstructionBuffer,
81 target_nth_instruction: Option<usize>,
82 position: u32,
83 minimum_size: u8,
84}
85
86#[derive(Clone)]
87pub struct ProgramBlobBuilder {
88 isa: InstructionSetKind,
89 ro_data_size: u32,
90 rw_data_size: u32,
91 stack_size: u32,
92 ro_data: Vec<u8>,
93 rw_data: Vec<u8>,
94 imports: Vec<ProgramSymbol<Box<[u8]>>>,
95 exports: Vec<(Export, ProgramSymbol<Box<[u8]>>)>,
96 code: Vec<Instruction>,
97 jump_table: Vec<u32>,
98 custom: Vec<(u8, Vec<u8>)>,
99 dispatch_table: Vec<Vec<u8>>,
100 ignore_instruction_set_incompatibility: bool,
101}
102
103struct SerializedCode {
104 jump_table: Vec<u8>,
105 jump_table_entry_count: u32,
106 jump_table_entry_size: u8,
107 code: Vec<u8>,
108 bitmask: Vec<u8>,
109 exports: Vec<(u32, Vec<u8>)>,
110}
111
112#[derive(Copy, Clone)]
113enum Export {
114 ByBlock(u32),
115 ByInstruction(u32),
116}
117
118impl ProgramBlobBuilder {
119 pub fn new(isa: InstructionSetKind) -> Self {
120 Self {
121 isa,
122 ro_data_size: Default::default(),
123 rw_data_size: Default::default(),
124 stack_size: Default::default(),
125 ro_data: Default::default(),
126 rw_data: Default::default(),
127 imports: Default::default(),
128 exports: Default::default(),
129 code: Default::default(),
130 jump_table: Default::default(),
131 custom: Default::default(),
132 dispatch_table: Default::default(),
133 ignore_instruction_set_incompatibility: false,
134 }
135 }
136
137 pub fn set_ignore_instruction_set_incompatibility(&mut self, value: bool) {
138 self.ignore_instruction_set_incompatibility = value;
139 }
140
141 pub fn set_ro_data_size(&mut self, size: u32) {
142 self.ro_data_size = size;
143 }
144
145 pub fn set_rw_data_size(&mut self, size: u32) {
146 self.rw_data_size = size;
147 }
148
149 pub fn set_stack_size(&mut self, size: u32) {
150 self.stack_size = size;
151 }
152
153 pub fn set_ro_data(&mut self, data: Vec<u8>) {
154 self.ro_data = data;
155 }
156
157 pub fn set_rw_data(&mut self, data: Vec<u8>) {
158 self.rw_data = data;
159 }
160
161 pub fn add_import(&mut self, import: &[u8]) {
162 self.imports.push(ProgramSymbol::new(import.into()));
163 }
164
165 pub fn add_export_by_basic_block(&mut self, target_basic_block: u32, symbol: &[u8]) {
166 self.exports
167 .push((Export::ByBlock(target_basic_block), ProgramSymbol::new(symbol.into())));
168 }
169
170 pub fn add_export_by_instruction(&mut self, target_instruction: u32, symbol: &[u8]) {
171 self.exports
172 .push((Export::ByInstruction(target_instruction), ProgramSymbol::new(symbol.into())));
173 }
174
175 pub fn add_dispatch_table_entry(&mut self, symbol: impl Into<Vec<u8>>) {
176 self.dispatch_table.push(symbol.into());
177 }
178
179 pub fn set_code(&mut self, code: &[Instruction], jump_table: &[u32]) {
180 self.code = code.to_vec();
181 self.jump_table = jump_table.to_vec();
182 }
183
184 fn serialize_code(&self) -> Result<SerializedCode, String> {
185 fn mutate<T>(slot: &mut T, value: T) -> bool
186 where
187 T: PartialEq,
188 {
189 if *slot == value {
190 false
191 } else {
192 *slot = value;
193 true
194 }
195 }
196
197 let basic_block_shift = self.dispatch_table.len() as u32;
199
200 let mut instructions = Vec::with_capacity(self.dispatch_table.len() + self.code.len());
201 for (nth, symbol) in self.dispatch_table.iter().enumerate() {
202 let Some(&(target, _)) = self.exports.iter().find(|(_, export_symbol)| symbol == export_symbol.as_bytes()) else {
203 return Err(alloc::format!(
204 "failed to build a dispatch table: symbol not found: {}",
205 ProgramSymbol::new(symbol)
206 ));
207 };
208
209 let minimum_size = if nth + 1 == self.dispatch_table.len() {
210 0
212 } else {
213 5
214 };
215
216 let target_basic_block = match target {
217 Export::ByBlock(target) => target,
218 Export::ByInstruction(..) => {
219 return Err(alloc::format!(
220 "failed to build a dispatch table: points to a symbol which is not basic block based: {}",
221 ProgramSymbol::new(symbol)
222 ));
223 }
224 };
225
226 instructions.push(SerializedInstruction {
227 instruction: Instruction::jump(target_basic_block + basic_block_shift),
228 bytes: InstructionBuffer::default(),
229 target_nth_instruction: None,
230 position: 0,
231 minimum_size,
232 });
233 }
234
235 for instruction in &self.code {
236 let mut instruction = *instruction;
237 if let Some(target_basic_block) = instruction.target_mut() {
238 *target_basic_block += basic_block_shift;
239 }
240
241 if !self.ignore_instruction_set_incompatibility {
242 let opcode = instruction.opcode();
243 if !self.isa.supports_opcode(opcode) {
244 return Err(alloc::format!(
245 "failed to build a program: the instruction '{}' is not available in the '{}' instruction set",
246 opcode.name(),
247 self.isa.name(),
248 ));
249 }
250 }
251
252 instructions.push(SerializedInstruction {
253 instruction,
254 bytes: InstructionBuffer::default(),
255 target_nth_instruction: None,
256 position: 0,
257 minimum_size: 0,
258 });
259 }
260
261 let mut basic_block_to_instruction_index = Vec::with_capacity(self.code.len());
262 basic_block_to_instruction_index.push(0);
263
264 for (nth_instruction, entry) in instructions.iter().enumerate() {
265 if entry.instruction.opcode().starts_new_basic_block() {
266 basic_block_to_instruction_index.push(nth_instruction + 1);
267 }
268 }
269
270 let mut position: u32 = 0;
271 for (nth_instruction, entry) in instructions.iter_mut().enumerate() {
272 entry.target_nth_instruction = entry.instruction.target_mut().map(|target| {
273 let target_nth_instruction = basic_block_to_instruction_index[*target as usize];
274 *target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32);
277 target_nth_instruction
278 });
279
280 entry.position = position;
281 entry.bytes = InstructionBuffer::new(self.isa, position, entry.minimum_size, entry.instruction);
282 position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions");
283 }
284
285 loop {
287 let mut any_modified = false;
288 position = 0;
289 for nth_instruction in 0..instructions.len() {
290 let mut self_modified = mutate(&mut instructions[nth_instruction].position, position);
291 if let Some(target_nth_instruction) = instructions[nth_instruction].target_nth_instruction {
292 let new_target = instructions[target_nth_instruction].position;
293 let old_target = instructions[nth_instruction].instruction.target_mut().unwrap();
294 self_modified |= mutate(old_target, new_target);
295
296 if self_modified {
297 instructions[nth_instruction].bytes = InstructionBuffer::new(
298 self.isa,
299 position,
300 instructions[nth_instruction].minimum_size,
301 instructions[nth_instruction].instruction,
302 );
303 }
304 }
305
306 position = position
307 .checked_add(instructions[nth_instruction].bytes.len() as u32)
308 .expect("too many instructions");
309
310 any_modified |= self_modified;
311 }
312
313 if !any_modified {
314 break;
315 }
316 }
317
318 let mut jump_table_entry_size = 0;
319 let mut jump_table_entries = Vec::with_capacity(self.jump_table.len());
320 for &target in &self.jump_table {
321 let target = target + basic_block_shift;
322 let target_nth_instruction = basic_block_to_instruction_index[target as usize];
323 let offset = instructions[target_nth_instruction].position.to_le_bytes();
324 jump_table_entries.push(offset);
325 jump_table_entry_size = core::cmp::max(jump_table_entry_size, offset.iter().take_while(|&&b| b != 0).count());
326 }
327
328 let mut output = SerializedCode {
329 jump_table_entry_count: jump_table_entries.len() as u32,
330 jump_table_entry_size: jump_table_entry_size as u8,
331 jump_table: Vec::with_capacity(jump_table_entry_size * jump_table_entries.len()),
332 code: Vec::with_capacity(instructions.iter().map(|entry| entry.bytes.len()).sum()),
333 bitmask: Vec::new(),
334 exports: Vec::with_capacity(self.exports.len()),
335 };
336
337 for target in jump_table_entries {
338 output.jump_table.extend_from_slice(&target[..jump_table_entry_size]);
339 }
340
341 struct BitVec {
342 bytes: Vec<u8>,
343 current: usize,
344 bits: usize,
345 }
346
347 impl BitVec {
348 fn with_capacity(capacity: usize) -> Self {
349 BitVec {
350 bytes: Vec::with_capacity(capacity),
351 current: 0,
352 bits: 0,
353 }
354 }
355
356 fn push(&mut self, value: bool) {
357 self.current |= usize::from(value) << self.bits;
358 self.bits += 1;
359 if self.bits == 8 {
360 self.bytes.push(self.current as u8);
361 self.current = 0;
362 self.bits = 0;
363 }
364 }
365
366 fn finish(mut self) -> Vec<u8> {
367 while self.bits > 0 {
368 self.push(false);
369 }
370 self.bytes
371 }
372 }
373
374 let mut bitmask = BitVec::with_capacity(output.code.capacity() / 8 + 1);
375 for entry in &instructions {
376 bitmask.push(true);
377 for _ in 1..entry.bytes.len() {
378 bitmask.push(false);
379 }
380
381 output.code.extend_from_slice(&entry.bytes);
382 }
383
384 output.bitmask = bitmask.finish();
385
386 for (target, symbol) in &self.exports {
387 let nth_instruction = match target {
388 Export::ByBlock(target_basic_block) => {
389 let target_basic_block = *target_basic_block as usize + basic_block_shift as usize;
390 basic_block_to_instruction_index[target_basic_block]
391 }
392 Export::ByInstruction(nth_instruction) => *nth_instruction as usize,
393 };
394
395 let offset = instructions[nth_instruction].position;
396 output.exports.push((offset, symbol.as_bytes().to_vec()));
397 }
398
399 if cfg!(debug_assertions) {
400 let mut parsed = Vec::new();
402 let mut offsets = alloc::collections::BTreeSet::new();
403
404 let parsed_instructions: Vec<_> =
405 crate::program::Instructions::new_unbounded(self.isa, &output.code, &output.bitmask, 0).collect();
406 for instruction in parsed_instructions {
407 if instruction.offset.0 as usize == output.code.len() {
408 debug_assert!(matches!(instruction.kind, Instruction::invalid));
410 break;
411 }
412 parsed.push(instruction);
413 offsets.insert(instruction.offset);
414 }
415
416 assert_eq!(parsed.len(), instructions.len());
417 for (nth_instruction, (mut parsed, entry)) in parsed.into_iter().zip(instructions.into_iter()).enumerate() {
418 let parsed_length = parsed.next_offset.0 - parsed.offset.0;
419 let opcode_mismatch = parsed.kind != entry.instruction && !self.ignore_instruction_set_incompatibility;
420 if opcode_mismatch || entry.position != parsed.offset.0 || u32::from(entry.bytes.length) != parsed_length {
421 panic!(
422 concat!(
423 "Broken serialization for instruction #{}:\n",
424 " Serialized:\n",
425 " Instruction: {:?}\n",
426 " Offset: {}\n",
427 " Length: {}\n",
428 " Bytes: {:?}\n",
429 " Deserialized:\n",
430 " Instruction: {:?}\n",
431 " Offset: {}\n",
432 " Length: {}\n",
433 " Bytes: {:?}\n",
434 ),
435 nth_instruction,
436 entry.instruction,
437 entry.position,
438 entry.bytes.len(),
439 &entry.bytes.bytes[..entry.bytes.length as usize],
440 parsed.kind,
441 parsed.offset.0,
442 parsed_length,
443 &output.code[parsed.offset.0 as usize..parsed.offset.0 as usize + parsed_length as usize],
444 );
445 }
446
447 if let Some(target) = parsed.kind.target_mut() {
448 assert!(offsets.contains(&ProgramCounter(*target)));
449 }
450 }
451 }
452
453 Ok(output)
454 }
455
456 pub fn add_custom_section(&mut self, section: u8, contents: Vec<u8>) {
457 self.custom.push((section, contents));
458 }
459
460 pub fn into_vec(self) -> Result<Vec<u8>, String> {
461 self.to_vec()
462 }
463
464 pub fn to_vec(&self) -> Result<Vec<u8>, String> {
465 let code = self.serialize_code()?;
466 let mut output = Vec::new();
467 let mut writer = Writer::new(&mut output);
468
469 writer.push_raw_bytes(&program::BLOB_MAGIC);
470 writer.push_byte(self.isa.blob_version());
471 writer.push_raw_bytes(&[0; BLOB_LEN_SIZE]);
472
473 if self.ro_data_size > 0 || self.rw_data_size > 0 || self.stack_size > 0 {
474 writer.push_section_inplace(program::SECTION_MEMORY_CONFIG, |writer| {
475 writer.push_varint(self.ro_data_size);
476 writer.push_varint(self.rw_data_size);
477 writer.push_varint(self.stack_size);
478 });
479 }
480
481 writer.push_section(program::SECTION_RO_DATA, &self.ro_data);
482 writer.push_section(program::SECTION_RW_DATA, &self.rw_data);
483 if !self.imports.is_empty() {
484 writer.push_section_inplace(program::SECTION_IMPORTS, |writer| {
485 let mut offsets_blob = Vec::new();
486 let mut symbols_blob = Vec::new();
487 for symbol in &self.imports {
488 offsets_blob.extend_from_slice(&(symbols_blob.len() as u32).to_le_bytes());
489 symbols_blob.extend_from_slice(symbol.as_bytes())
490 }
491
492 writer.push_varint(self.imports.len().try_into().expect("too many imports"));
493 writer.push_raw_bytes(&offsets_blob);
494 writer.push_raw_bytes(&symbols_blob);
495 });
496 }
497
498 if !code.exports.is_empty() {
499 writer.push_section_inplace(program::SECTION_EXPORTS, |writer| {
500 writer.push_varint(code.exports.len().try_into().expect("too many exports"));
501 for (offset, symbol) in code.exports {
502 writer.push_varint(offset);
503 writer.push_bytes_with_length(&symbol);
504 }
505 });
506 }
507
508 writer.push_section_inplace(program::SECTION_CODE_AND_JUMP_TABLE, |writer| {
509 writer.push_varint(code.jump_table_entry_count);
510 writer.push_byte(code.jump_table_entry_size);
511 writer.push_varint(code.code.len() as u32);
512 writer.push_raw_bytes(&code.jump_table);
513 writer.push_raw_bytes(&code.code);
514 writer.push_raw_bytes(&code.bitmask);
515 });
516
517 for (section, contents) in &self.custom {
518 writer.push_section(*section, contents);
519 }
520
521 writer.push_raw_bytes(&[program::SECTION_END_OF_FILE]);
522
523 let blob_len = (writer.len() as u64).to_le_bytes();
524 output[BLOB_LEN_OFFSET..BLOB_LEN_OFFSET + BLOB_LEN_SIZE].copy_from_slice(&blob_len);
525
526 Ok(output)
527 }
528}
529
530pub struct Writer<'a> {
531 buffer: &'a mut Vec<u8>,
532}
533
534impl<'a> Writer<'a> {
535 pub fn new(buffer: &'a mut Vec<u8>) -> Self {
536 Self { buffer }
537 }
538
539 fn push_section_inplace(&mut self, section: u8, callback: impl FnOnce(&mut Self)) -> Range<usize> {
540 let section_position = self.buffer.len();
541 self.buffer.push(section);
542
543 let length_position = self.buffer.len();
545 self.push_raw_bytes(&[0xff_u8; crate::varint::MAX_VARINT_LENGTH]);
546
547 let payload_position = self.buffer.len();
548 callback(self);
549
550 let payload_length: u32 = (self.buffer.len() - payload_position).try_into().expect("section size overflow");
551 if payload_length == 0 {
552 self.buffer.truncate(section_position);
554 return 0..0;
555 }
556
557 let length_length = crate::varint::write_varint(payload_length, &mut self.buffer[length_position..]);
559
560 self.buffer
562 .drain(length_position + length_length..length_position + crate::varint::MAX_VARINT_LENGTH);
563
564 length_position + length_length..self.buffer.len()
565 }
566
567 fn push_section(&mut self, section: u8, contents: &[u8]) {
568 if contents.is_empty() {
569 return;
570 }
571
572 self.push_byte(section);
573 self.push_varint(contents.len().try_into().expect("section size overflow"));
574 self.push_raw_bytes(contents);
575 }
576
577 pub fn push_raw_bytes(&mut self, slice: &[u8]) {
578 self.buffer.extend_from_slice(slice);
579 }
580
581 pub fn push_byte(&mut self, byte: u8) {
582 self.buffer.push(byte);
583 }
584
585 pub fn push_u32(&mut self, value: u32) {
586 self.push_raw_bytes(&value.to_le_bytes());
587 }
588
589 pub fn push_varint(&mut self, value: u32) {
590 let mut buffer = [0xff_u8; crate::varint::MAX_VARINT_LENGTH];
591 let length = crate::varint::write_varint(value, &mut buffer);
592 self.push_raw_bytes(&buffer[..length]);
593 }
594
595 pub fn push_bytes_with_length(&mut self, slice: &[u8]) {
596 self.push_varint(slice.len().try_into().expect("length overflow"));
597 self.push_raw_bytes(slice);
598 }
599
600 pub fn len(&self) -> usize {
601 self.buffer.len()
602 }
603
604 pub fn is_empty(&self) -> bool {
605 self.buffer.is_empty()
606 }
607}