polkavm/
compiler.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3
4use polkavm_assembler::{Assembler, Label};
5use polkavm_common::program::{ProgramExport, Instruction};
6use polkavm_common::zygote::{
7    AddressTable, VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH, VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH,
8};
9use polkavm_common::abi::VM_CODE_ADDRESS_ALIGNMENT;
10
11use crate::api::VisitorWrapper;
12use crate::error::{bail, Error};
13
14use crate::sandbox::{Sandbox, SandboxProgram, SandboxInit};
15use crate::config::{GasMeteringKind, ModuleConfig, SandboxKind};
16use crate::utils::GuestInit;
17
18#[cfg(target_arch = "x86_64")]
19mod amd64;
20
21pub(crate) struct Compiler<'a> {
22    asm: Assembler,
23    exports: &'a [ProgramExport<'a>],
24    basic_block_by_jump_table_index: &'a [u32],
25    jump_table_index_by_basic_block: &'a [u32],
26    nth_basic_block_to_label: Vec<Label>,
27    nth_basic_block_to_label_pending: Vec<Option<Label>>,
28    nth_basic_block_to_machine_code_offset: Vec<usize>,
29    pending_label_count: usize,
30    jump_table: Vec<u8>,
31    export_to_label: HashMap<u32, Label>,
32    export_trampolines: Vec<u64>,
33    debug_trace_execution: bool,
34    ecall_label: Label,
35    trap_label: Label,
36    trace_label: Label,
37    jump_table_label: Label,
38    sbrk_label: Label,
39    sandbox_kind: SandboxKind,
40    gas_metering: Option<GasMeteringKind>,
41    native_code_address: u64,
42    address_table: AddressTable,
43    vmctx_regs_offset: usize,
44    vmctx_gas_offset: usize,
45    vmctx_heap_info_offset: usize,
46    nth_instruction_to_code_offset_map: Vec<u32>,
47    init: GuestInit<'a>,
48    is_last_instruction: bool,
49}
50
51struct CompilationResult<'a> {
52    code: Vec<u8>,
53    jump_table: Vec<u8>,
54    export_trampolines: Vec<u64>,
55    sysreturn_address: u64,
56    nth_instruction_to_code_offset_map: Vec<u32>,
57    init: GuestInit<'a>,
58}
59
60impl<'a> Compiler<'a> {
61    #[allow(clippy::too_many_arguments)]
62    fn new(
63        config: &ModuleConfig,
64        exports: &'a [ProgramExport<'a>],
65        basic_block_by_jump_table_index: &'a [u32],
66        jump_table_index_by_basic_block: &'a [u32],
67        sandbox_kind: SandboxKind,
68        address_table: AddressTable,
69        vmctx_regs_offset: usize,
70        vmctx_gas_offset: usize,
71        vmctx_heap_info_offset: usize,
72        debug_trace_execution: bool,
73        native_code_address: u64,
74        instruction_count: usize,
75        basic_block_count: usize,
76        init: GuestInit<'a>,
77    ) -> Self {
78        let mut asm = Assembler::new();
79        let ecall_label = asm.forward_declare_label();
80        let trap_label = asm.forward_declare_label();
81        let trace_label = asm.forward_declare_label();
82        let jump_table_label = asm.forward_declare_label();
83        let sbrk_label = asm.forward_declare_label();
84
85        let nth_basic_block_to_label = Vec::with_capacity(basic_block_count);
86        let mut nth_basic_block_to_machine_code_offset = Vec::new();
87        if config.gas_metering.is_some() {
88            nth_basic_block_to_machine_code_offset.reserve(basic_block_count);
89        }
90
91        let mut nth_basic_block_to_label_pending = Vec::new();
92        nth_basic_block_to_label_pending.resize(basic_block_count, None);
93
94        let nth_instruction_to_code_offset_map: Vec<u32> = Vec::with_capacity(instruction_count + 1);
95        polkavm_common::static_assert!(polkavm_common::zygote::VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE < u32::MAX);
96
97        asm.reserve_code(instruction_count * 16);
98        asm.reserve_labels(basic_block_count * 2);
99        asm.reserve_fixups(basic_block_count);
100        asm.set_origin(native_code_address);
101
102        let mut compiler = Compiler {
103            asm,
104            exports,
105            basic_block_by_jump_table_index,
106            jump_table_index_by_basic_block,
107            nth_basic_block_to_label,
108            nth_basic_block_to_label_pending,
109            nth_basic_block_to_machine_code_offset,
110            pending_label_count: 0,
111            jump_table: Default::default(),
112            export_to_label: Default::default(),
113            export_trampolines: Default::default(),
114            ecall_label,
115            trap_label,
116            trace_label,
117            jump_table_label,
118            sbrk_label,
119            sandbox_kind,
120            gas_metering: config.gas_metering,
121            native_code_address,
122            debug_trace_execution,
123            address_table,
124            vmctx_regs_offset,
125            vmctx_gas_offset,
126            vmctx_heap_info_offset,
127            nth_instruction_to_code_offset_map,
128            init,
129            is_last_instruction: instruction_count == 0,
130        };
131
132        compiler.start_new_basic_block();
133        compiler
134    }
135
136    fn finalize(mut self, gas_cost_for_basic_block: &[u32]) -> Result<CompilationResult<'a>, Error> {
137        let epilogue_start = self.asm.len();
138        self.nth_instruction_to_code_offset_map.push(epilogue_start as u32);
139
140        if self.gas_metering.is_some() {
141            log::trace!("Finalizing block costs...");
142            assert_eq!(gas_cost_for_basic_block.len(), self.nth_basic_block_to_machine_code_offset.len());
143            let nth_basic_block_to_machine_code_offset = core::mem::take(&mut self.nth_basic_block_to_machine_code_offset);
144            for (offset, &cost) in nth_basic_block_to_machine_code_offset.into_iter().zip(gas_cost_for_basic_block.iter()) {
145                self.emit_weight(offset, cost);
146            }
147        }
148
149        log::trace!("Emitting trampolines");
150
151        if self.debug_trace_execution {
152            self.emit_trace_trampoline();
153        }
154
155        self.emit_trap_trampoline();
156        self.emit_ecall_trampoline();
157        self.emit_sbrk_trampoline();
158        self.emit_export_trampolines();
159
160        let label_sysreturn = self.emit_sysreturn();
161
162        if self.pending_label_count > 0 {
163            bail!("program is missing {} jump target(s)", self.pending_label_count);
164        }
165
166        let native_pointer_size = core::mem::size_of::<usize>();
167        let jump_table_entry_size = native_pointer_size * VM_CODE_ADDRESS_ALIGNMENT as usize;
168        self.jump_table.resize(self.basic_block_by_jump_table_index.len() * jump_table_entry_size, 0);
169
170        // The very first entry is always invalid.
171        assert_eq!(self.basic_block_by_jump_table_index[0], u32::MAX);
172
173        for (jump_table_index, nth_basic_block) in self.basic_block_by_jump_table_index.iter().copied().enumerate().skip(1) {
174            let label = self.nth_basic_block_to_label[nth_basic_block as usize];
175            let offset = jump_table_index * jump_table_entry_size;
176            let range = offset..offset + native_pointer_size;
177            let address = self.native_code_address
178                .checked_add_signed(self.asm.get_label_origin_offset_or_panic(label) as i64)
179                .expect("overflow");
180
181            log::trace!("Jump table: [0x{:x}] = 0x{:x}", self.native_code_address + range.start as u64, address);
182            self.jump_table[range].copy_from_slice(&address.to_ne_bytes());
183        }
184
185        self.export_trampolines.reserve(self.exports.len());
186        for export in self.exports {
187            let label = self.export_to_label.get(&export.jump_target()).unwrap();
188            let native_address = self.native_code_address
189                .checked_add_signed(self.asm.get_label_origin_offset_or_panic(*label) as i64)
190                .expect("overflow");
191            self.export_trampolines.push(native_address);
192        }
193
194        let epilogue_length = self.asm.len() - epilogue_start;
195        assert!(
196            epilogue_length <= VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH as usize,
197            "maximum epilogue length of {} exceeded with {} bytes",
198            VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH,
199            epilogue_length
200        );
201
202        let sysreturn_address = self.native_code_address
203            .checked_add_signed(self.asm.get_label_origin_offset_or_panic(label_sysreturn) as i64)
204            .expect("overflow");
205
206        match self.sandbox_kind {
207            SandboxKind::Linux => {},
208            SandboxKind::Generic => {
209                let native_page_size = crate::sandbox::get_native_page_size();
210                let padded_length = polkavm_common::utils::align_to_next_page_usize(native_page_size, self.asm.len()).unwrap();
211                self.asm.resize(padded_length, Self::PADDING_BYTE);
212                self.asm.define_label(self.jump_table_label);
213            }
214        }
215
216        let code = self.asm.finalize();
217        Ok(CompilationResult {
218            code: code.into(),
219            jump_table: self.jump_table,
220            export_trampolines: self.export_trampolines,
221            sysreturn_address,
222            nth_instruction_to_code_offset_map: self.nth_instruction_to_code_offset_map,
223            init: self.init,
224        })
225    }
226
227    #[inline(always)]
228    fn push<T>(&mut self, inst: polkavm_assembler::Instruction<T>) where T: core::fmt::Display {
229        self.asm.push(inst);
230    }
231
232    fn get_or_forward_declare_label(&mut self, nth_basic_block: u32) -> Label {
233        match self.nth_basic_block_to_label.get(nth_basic_block as usize) {
234            Some(label) => *label,
235            None => match self.nth_basic_block_to_label_pending[nth_basic_block as usize] {
236                Some(label) => label,
237                None => {
238                    let label = self.asm.forward_declare_label();
239                    if nth_basic_block as usize >= self.nth_basic_block_to_label_pending.len() {
240                        self.nth_basic_block_to_label_pending.resize(nth_basic_block as usize + 1, None);
241                    }
242                    self.nth_basic_block_to_label_pending[nth_basic_block as usize] = Some(label);
243                    self.pending_label_count += 1;
244                    label
245                }
246            },
247        }
248    }
249
250    fn define_label(&mut self, label: Label) {
251        log::trace!("Label: {}", label);
252        self.asm.define_label(label);
253    }
254
255    #[inline(always)]
256    fn next_basic_block(&self) -> u32 {
257        self.nth_basic_block_to_label.len() as u32
258    }
259
260    fn start_new_basic_block(&mut self) {
261        if self.is_last_instruction {
262            return;
263        }
264
265        let nth_basic_block = self.nth_basic_block_to_label.len();
266        log::trace!("Starting new basic block: @{nth_basic_block:x}");
267
268        let label = if let Some(label) = self.nth_basic_block_to_label_pending.get_mut(nth_basic_block).and_then(|value| value.take()) {
269            self.pending_label_count -= 1;
270            label
271        } else {
272            self.asm.forward_declare_label()
273        };
274
275        self.define_label(label);
276        self.nth_basic_block_to_label.push(label);
277
278        if let Some(gas_metering) = self.gas_metering {
279            let offset = self.asm.len();
280            self.nth_basic_block_to_machine_code_offset.push(offset);
281            self.emit_gas_metering_stub(gas_metering);
282        }
283    }
284}
285
286impl<'a> VisitorWrapper<'a, Compiler<'a>> {
287    fn current_instruction(&self) -> Instruction {
288        Instruction::deserialize(&self.common.code[self.common.current_instruction_offset..]).expect("failed to deserialize instruction").1
289    }
290
291    #[cold]
292    fn panic_on_too_long_instruction(&self, instruction_length: usize) -> ! {
293        panic!(
294            "maximum instruction length of {} exceeded with {} bytes for instruction: {}",
295            VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH,
296            instruction_length,
297            self.current_instruction(),
298        );
299    }
300
301    #[cold]
302    fn trace_compiled_instruction(&self) {
303        log::trace!("Compiling {}/{}: {}", self.common.nth_instruction + 1, self.common.instruction_count, self.current_instruction());
304    }
305}
306
307impl<'a> crate::api::BackendVisitor for VisitorWrapper<'a, Compiler<'a>> {
308    #[inline(always)]
309    fn before_instruction(&mut self) {
310        let initial_length = self.visitor.asm.len();
311        self.nth_instruction_to_code_offset_map.push(initial_length as u32);
312
313        if log::log_enabled!(log::Level::Trace) {
314            self.trace_compiled_instruction();
315        }
316
317        if self.debug_trace_execution {
318            self.visitor.trace_execution(self.common.nth_instruction);
319        }
320
321        self.is_last_instruction = self.common.is_last_instruction();
322        self.asm.reserve::<8>();
323    }
324
325    fn after_instruction(&mut self) {
326        if !self.debug_trace_execution {
327            let offset = *self.nth_instruction_to_code_offset_map.last().unwrap() as usize;
328            let instruction_length = self.asm.len() - offset;
329            if instruction_length > VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH as usize {
330                self.panic_on_too_long_instruction(instruction_length)
331            }
332        }
333    }
334}
335
336impl<S> crate::api::BackendModule for CompiledModule<S> where S: Sandbox {
337    type BackendVisitor<'a> = Compiler<'a>;
338    type Aux = S::AddressSpace;
339
340    fn create_visitor<'a>(
341        config: &'a ModuleConfig,
342        exports: &'a [ProgramExport],
343        basic_block_by_jump_table_index: &'a [u32],
344        jump_table_index_by_basic_block: &'a [u32],
345        init: GuestInit<'a>,
346        instruction_count: usize,
347        basic_block_count: usize,
348        debug_trace_execution: bool,
349    ) -> Result<(Self::BackendVisitor<'a>, Self::Aux), Error> {
350        let native_page_size = crate::sandbox::get_native_page_size();
351        if native_page_size > config.page_size as usize || config.page_size as usize % native_page_size != 0 {
352            return Err(format!("configured page size of {} is incompatible with the native page size of {}", config.page_size, native_page_size).into());
353        }
354
355        let address_space = S::reserve_address_space().map_err(Error::from_display)?;
356        let native_code_address = crate::sandbox::SandboxAddressSpace::native_code_address(&address_space);
357        let program_assembler = Compiler::new(
358            config,
359            exports,
360            basic_block_by_jump_table_index,
361            jump_table_index_by_basic_block,
362            S::KIND,
363            S::address_table(),
364            S::vmctx_regs_offset(),
365            S::vmctx_gas_offset(),
366            S::vmctx_heap_info_offset(),
367            debug_trace_execution,
368            native_code_address,
369            instruction_count,
370            basic_block_count,
371            init,
372        );
373
374        Ok((program_assembler, address_space))
375    }
376
377    fn finish_compilation<'a>(wrapper: VisitorWrapper<'a, Self::BackendVisitor<'a>>, address_space: Self::Aux) -> Result<(crate::api::Common<'a>, Self), Error> {
378        let result = wrapper.visitor.finalize(&wrapper.common.gas_cost_for_basic_block)?;
379
380        let init = SandboxInit {
381            guest_init: result.init,
382            code: &result.code,
383            jump_table: &result.jump_table,
384            sysreturn_address: result.sysreturn_address
385        };
386
387        let sandbox_program = S::prepare_program(init, address_space).map_err(Error::from_display)?;
388        let export_trampolines = result.export_trampolines;
389
390        let module = CompiledModule {
391            sandbox_program,
392            export_trampolines,
393            nth_instruction_to_code_offset_map: result.nth_instruction_to_code_offset_map,
394        };
395
396        Ok((wrapper.common, module))
397    }
398}
399
400pub(crate) struct CompiledModule<S> where S: Sandbox {
401    pub(crate) sandbox_program: S::Program,
402    pub(crate) export_trampolines: Vec<u64>,
403    nth_instruction_to_code_offset_map: Vec<u32>,
404}
405
406impl<S> CompiledModule<S> where S: Sandbox {
407    pub fn machine_code(&self) -> Cow<[u8]> {
408        self.sandbox_program.machine_code()
409    }
410
411    pub fn nth_instruction_to_code_offset_map(&self) -> &[u32] {
412        &self.nth_instruction_to_code_offset_map
413    }
414}