1use std::borrow::Cow;
2use std::collections::HashMap;
3
4use polkavm_assembler::{Assembler, Label};
5use polkavm_common::program::{ProgramExport, Instruction};
6use polkavm_common::zygote::{
7 AddressTable, VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH, VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH,
8};
9use polkavm_common::abi::VM_CODE_ADDRESS_ALIGNMENT;
10
11use crate::api::VisitorWrapper;
12use crate::error::{bail, Error};
13
14use crate::sandbox::{Sandbox, SandboxProgram, SandboxInit};
15use crate::config::{GasMeteringKind, ModuleConfig, SandboxKind};
16use crate::utils::GuestInit;
17
18#[cfg(target_arch = "x86_64")]
19mod amd64;
20
21pub(crate) struct Compiler<'a> {
22 asm: Assembler,
23 exports: &'a [ProgramExport<'a>],
24 basic_block_by_jump_table_index: &'a [u32],
25 jump_table_index_by_basic_block: &'a [u32],
26 nth_basic_block_to_label: Vec<Label>,
27 nth_basic_block_to_label_pending: Vec<Option<Label>>,
28 nth_basic_block_to_machine_code_offset: Vec<usize>,
29 pending_label_count: usize,
30 jump_table: Vec<u8>,
31 export_to_label: HashMap<u32, Label>,
32 export_trampolines: Vec<u64>,
33 debug_trace_execution: bool,
34 ecall_label: Label,
35 trap_label: Label,
36 trace_label: Label,
37 jump_table_label: Label,
38 sbrk_label: Label,
39 sandbox_kind: SandboxKind,
40 gas_metering: Option<GasMeteringKind>,
41 native_code_address: u64,
42 address_table: AddressTable,
43 vmctx_regs_offset: usize,
44 vmctx_gas_offset: usize,
45 vmctx_heap_info_offset: usize,
46 nth_instruction_to_code_offset_map: Vec<u32>,
47 init: GuestInit<'a>,
48 is_last_instruction: bool,
49}
50
51struct CompilationResult<'a> {
52 code: Vec<u8>,
53 jump_table: Vec<u8>,
54 export_trampolines: Vec<u64>,
55 sysreturn_address: u64,
56 nth_instruction_to_code_offset_map: Vec<u32>,
57 init: GuestInit<'a>,
58}
59
60impl<'a> Compiler<'a> {
61 #[allow(clippy::too_many_arguments)]
62 fn new(
63 config: &ModuleConfig,
64 exports: &'a [ProgramExport<'a>],
65 basic_block_by_jump_table_index: &'a [u32],
66 jump_table_index_by_basic_block: &'a [u32],
67 sandbox_kind: SandboxKind,
68 address_table: AddressTable,
69 vmctx_regs_offset: usize,
70 vmctx_gas_offset: usize,
71 vmctx_heap_info_offset: usize,
72 debug_trace_execution: bool,
73 native_code_address: u64,
74 instruction_count: usize,
75 basic_block_count: usize,
76 init: GuestInit<'a>,
77 ) -> Self {
78 let mut asm = Assembler::new();
79 let ecall_label = asm.forward_declare_label();
80 let trap_label = asm.forward_declare_label();
81 let trace_label = asm.forward_declare_label();
82 let jump_table_label = asm.forward_declare_label();
83 let sbrk_label = asm.forward_declare_label();
84
85 let nth_basic_block_to_label = Vec::with_capacity(basic_block_count);
86 let mut nth_basic_block_to_machine_code_offset = Vec::new();
87 if config.gas_metering.is_some() {
88 nth_basic_block_to_machine_code_offset.reserve(basic_block_count);
89 }
90
91 let mut nth_basic_block_to_label_pending = Vec::new();
92 nth_basic_block_to_label_pending.resize(basic_block_count, None);
93
94 let nth_instruction_to_code_offset_map: Vec<u32> = Vec::with_capacity(instruction_count + 1);
95 polkavm_common::static_assert!(polkavm_common::zygote::VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE < u32::MAX);
96
97 asm.reserve_code(instruction_count * 16);
98 asm.reserve_labels(basic_block_count * 2);
99 asm.reserve_fixups(basic_block_count);
100 asm.set_origin(native_code_address);
101
102 let mut compiler = Compiler {
103 asm,
104 exports,
105 basic_block_by_jump_table_index,
106 jump_table_index_by_basic_block,
107 nth_basic_block_to_label,
108 nth_basic_block_to_label_pending,
109 nth_basic_block_to_machine_code_offset,
110 pending_label_count: 0,
111 jump_table: Default::default(),
112 export_to_label: Default::default(),
113 export_trampolines: Default::default(),
114 ecall_label,
115 trap_label,
116 trace_label,
117 jump_table_label,
118 sbrk_label,
119 sandbox_kind,
120 gas_metering: config.gas_metering,
121 native_code_address,
122 debug_trace_execution,
123 address_table,
124 vmctx_regs_offset,
125 vmctx_gas_offset,
126 vmctx_heap_info_offset,
127 nth_instruction_to_code_offset_map,
128 init,
129 is_last_instruction: instruction_count == 0,
130 };
131
132 compiler.start_new_basic_block();
133 compiler
134 }
135
136 fn finalize(mut self, gas_cost_for_basic_block: &[u32]) -> Result<CompilationResult<'a>, Error> {
137 let epilogue_start = self.asm.len();
138 self.nth_instruction_to_code_offset_map.push(epilogue_start as u32);
139
140 if self.gas_metering.is_some() {
141 log::trace!("Finalizing block costs...");
142 assert_eq!(gas_cost_for_basic_block.len(), self.nth_basic_block_to_machine_code_offset.len());
143 let nth_basic_block_to_machine_code_offset = core::mem::take(&mut self.nth_basic_block_to_machine_code_offset);
144 for (offset, &cost) in nth_basic_block_to_machine_code_offset.into_iter().zip(gas_cost_for_basic_block.iter()) {
145 self.emit_weight(offset, cost);
146 }
147 }
148
149 log::trace!("Emitting trampolines");
150
151 if self.debug_trace_execution {
152 self.emit_trace_trampoline();
153 }
154
155 self.emit_trap_trampoline();
156 self.emit_ecall_trampoline();
157 self.emit_sbrk_trampoline();
158 self.emit_export_trampolines();
159
160 let label_sysreturn = self.emit_sysreturn();
161
162 if self.pending_label_count > 0 {
163 bail!("program is missing {} jump target(s)", self.pending_label_count);
164 }
165
166 let native_pointer_size = core::mem::size_of::<usize>();
167 let jump_table_entry_size = native_pointer_size * VM_CODE_ADDRESS_ALIGNMENT as usize;
168 self.jump_table.resize(self.basic_block_by_jump_table_index.len() * jump_table_entry_size, 0);
169
170 assert_eq!(self.basic_block_by_jump_table_index[0], u32::MAX);
172
173 for (jump_table_index, nth_basic_block) in self.basic_block_by_jump_table_index.iter().copied().enumerate().skip(1) {
174 let label = self.nth_basic_block_to_label[nth_basic_block as usize];
175 let offset = jump_table_index * jump_table_entry_size;
176 let range = offset..offset + native_pointer_size;
177 let address = self.native_code_address
178 .checked_add_signed(self.asm.get_label_origin_offset_or_panic(label) as i64)
179 .expect("overflow");
180
181 log::trace!("Jump table: [0x{:x}] = 0x{:x}", self.native_code_address + range.start as u64, address);
182 self.jump_table[range].copy_from_slice(&address.to_ne_bytes());
183 }
184
185 self.export_trampolines.reserve(self.exports.len());
186 for export in self.exports {
187 let label = self.export_to_label.get(&export.jump_target()).unwrap();
188 let native_address = self.native_code_address
189 .checked_add_signed(self.asm.get_label_origin_offset_or_panic(*label) as i64)
190 .expect("overflow");
191 self.export_trampolines.push(native_address);
192 }
193
194 let epilogue_length = self.asm.len() - epilogue_start;
195 assert!(
196 epilogue_length <= VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH as usize,
197 "maximum epilogue length of {} exceeded with {} bytes",
198 VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH,
199 epilogue_length
200 );
201
202 let sysreturn_address = self.native_code_address
203 .checked_add_signed(self.asm.get_label_origin_offset_or_panic(label_sysreturn) as i64)
204 .expect("overflow");
205
206 match self.sandbox_kind {
207 SandboxKind::Linux => {},
208 SandboxKind::Generic => {
209 let native_page_size = crate::sandbox::get_native_page_size();
210 let padded_length = polkavm_common::utils::align_to_next_page_usize(native_page_size, self.asm.len()).unwrap();
211 self.asm.resize(padded_length, Self::PADDING_BYTE);
212 self.asm.define_label(self.jump_table_label);
213 }
214 }
215
216 let code = self.asm.finalize();
217 Ok(CompilationResult {
218 code: code.into(),
219 jump_table: self.jump_table,
220 export_trampolines: self.export_trampolines,
221 sysreturn_address,
222 nth_instruction_to_code_offset_map: self.nth_instruction_to_code_offset_map,
223 init: self.init,
224 })
225 }
226
227 #[inline(always)]
228 fn push<T>(&mut self, inst: polkavm_assembler::Instruction<T>) where T: core::fmt::Display {
229 self.asm.push(inst);
230 }
231
232 fn get_or_forward_declare_label(&mut self, nth_basic_block: u32) -> Label {
233 match self.nth_basic_block_to_label.get(nth_basic_block as usize) {
234 Some(label) => *label,
235 None => match self.nth_basic_block_to_label_pending[nth_basic_block as usize] {
236 Some(label) => label,
237 None => {
238 let label = self.asm.forward_declare_label();
239 if nth_basic_block as usize >= self.nth_basic_block_to_label_pending.len() {
240 self.nth_basic_block_to_label_pending.resize(nth_basic_block as usize + 1, None);
241 }
242 self.nth_basic_block_to_label_pending[nth_basic_block as usize] = Some(label);
243 self.pending_label_count += 1;
244 label
245 }
246 },
247 }
248 }
249
250 fn define_label(&mut self, label: Label) {
251 log::trace!("Label: {}", label);
252 self.asm.define_label(label);
253 }
254
255 #[inline(always)]
256 fn next_basic_block(&self) -> u32 {
257 self.nth_basic_block_to_label.len() as u32
258 }
259
260 fn start_new_basic_block(&mut self) {
261 if self.is_last_instruction {
262 return;
263 }
264
265 let nth_basic_block = self.nth_basic_block_to_label.len();
266 log::trace!("Starting new basic block: @{nth_basic_block:x}");
267
268 let label = if let Some(label) = self.nth_basic_block_to_label_pending.get_mut(nth_basic_block).and_then(|value| value.take()) {
269 self.pending_label_count -= 1;
270 label
271 } else {
272 self.asm.forward_declare_label()
273 };
274
275 self.define_label(label);
276 self.nth_basic_block_to_label.push(label);
277
278 if let Some(gas_metering) = self.gas_metering {
279 let offset = self.asm.len();
280 self.nth_basic_block_to_machine_code_offset.push(offset);
281 self.emit_gas_metering_stub(gas_metering);
282 }
283 }
284}
285
286impl<'a> VisitorWrapper<'a, Compiler<'a>> {
287 fn current_instruction(&self) -> Instruction {
288 Instruction::deserialize(&self.common.code[self.common.current_instruction_offset..]).expect("failed to deserialize instruction").1
289 }
290
291 #[cold]
292 fn panic_on_too_long_instruction(&self, instruction_length: usize) -> ! {
293 panic!(
294 "maximum instruction length of {} exceeded with {} bytes for instruction: {}",
295 VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH,
296 instruction_length,
297 self.current_instruction(),
298 );
299 }
300
301 #[cold]
302 fn trace_compiled_instruction(&self) {
303 log::trace!("Compiling {}/{}: {}", self.common.nth_instruction + 1, self.common.instruction_count, self.current_instruction());
304 }
305}
306
307impl<'a> crate::api::BackendVisitor for VisitorWrapper<'a, Compiler<'a>> {
308 #[inline(always)]
309 fn before_instruction(&mut self) {
310 let initial_length = self.visitor.asm.len();
311 self.nth_instruction_to_code_offset_map.push(initial_length as u32);
312
313 if log::log_enabled!(log::Level::Trace) {
314 self.trace_compiled_instruction();
315 }
316
317 if self.debug_trace_execution {
318 self.visitor.trace_execution(self.common.nth_instruction);
319 }
320
321 self.is_last_instruction = self.common.is_last_instruction();
322 self.asm.reserve::<8>();
323 }
324
325 fn after_instruction(&mut self) {
326 if !self.debug_trace_execution {
327 let offset = *self.nth_instruction_to_code_offset_map.last().unwrap() as usize;
328 let instruction_length = self.asm.len() - offset;
329 if instruction_length > VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH as usize {
330 self.panic_on_too_long_instruction(instruction_length)
331 }
332 }
333 }
334}
335
336impl<S> crate::api::BackendModule for CompiledModule<S> where S: Sandbox {
337 type BackendVisitor<'a> = Compiler<'a>;
338 type Aux = S::AddressSpace;
339
340 fn create_visitor<'a>(
341 config: &'a ModuleConfig,
342 exports: &'a [ProgramExport],
343 basic_block_by_jump_table_index: &'a [u32],
344 jump_table_index_by_basic_block: &'a [u32],
345 init: GuestInit<'a>,
346 instruction_count: usize,
347 basic_block_count: usize,
348 debug_trace_execution: bool,
349 ) -> Result<(Self::BackendVisitor<'a>, Self::Aux), Error> {
350 let native_page_size = crate::sandbox::get_native_page_size();
351 if native_page_size > config.page_size as usize || config.page_size as usize % native_page_size != 0 {
352 return Err(format!("configured page size of {} is incompatible with the native page size of {}", config.page_size, native_page_size).into());
353 }
354
355 let address_space = S::reserve_address_space().map_err(Error::from_display)?;
356 let native_code_address = crate::sandbox::SandboxAddressSpace::native_code_address(&address_space);
357 let program_assembler = Compiler::new(
358 config,
359 exports,
360 basic_block_by_jump_table_index,
361 jump_table_index_by_basic_block,
362 S::KIND,
363 S::address_table(),
364 S::vmctx_regs_offset(),
365 S::vmctx_gas_offset(),
366 S::vmctx_heap_info_offset(),
367 debug_trace_execution,
368 native_code_address,
369 instruction_count,
370 basic_block_count,
371 init,
372 );
373
374 Ok((program_assembler, address_space))
375 }
376
377 fn finish_compilation<'a>(wrapper: VisitorWrapper<'a, Self::BackendVisitor<'a>>, address_space: Self::Aux) -> Result<(crate::api::Common<'a>, Self), Error> {
378 let result = wrapper.visitor.finalize(&wrapper.common.gas_cost_for_basic_block)?;
379
380 let init = SandboxInit {
381 guest_init: result.init,
382 code: &result.code,
383 jump_table: &result.jump_table,
384 sysreturn_address: result.sysreturn_address
385 };
386
387 let sandbox_program = S::prepare_program(init, address_space).map_err(Error::from_display)?;
388 let export_trampolines = result.export_trampolines;
389
390 let module = CompiledModule {
391 sandbox_program,
392 export_trampolines,
393 nth_instruction_to_code_offset_map: result.nth_instruction_to_code_offset_map,
394 };
395
396 Ok((wrapper.common, module))
397 }
398}
399
400pub(crate) struct CompiledModule<S> where S: Sandbox {
401 pub(crate) sandbox_program: S::Program,
402 pub(crate) export_trampolines: Vec<u64>,
403 nth_instruction_to_code_offset_map: Vec<u32>,
404}
405
406impl<S> CompiledModule<S> where S: Sandbox {
407 pub fn machine_code(&self) -> Cow<[u8]> {
408 self.sandbox_program.machine_code()
409 }
410
411 pub fn nth_instruction_to_code_offset_map(&self) -> &[u32] {
412 &self.nth_instruction_to_code_offset_map
413 }
414}