polkavm_common/
zygote.rs

1//! This module defines the ABI boundary between the host and the zygote.
2//!
3//! In general everything here can be modified at will, provided the zygote
4//! is recompiled.
5
6use crate::abi::MemoryMap;
7use core::cell::UnsafeCell;
8use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64};
9
10// Due to the limitations of Rust's compile time constant evaluation machinery
11// we need to define this struct multiple times.
12macro_rules! define_address_table {
13    ($($name:ident: $type:ty,)+) => {
14        #[repr(C)]
15        pub struct AddressTableRaw {
16            $(pub $name: $type),+
17        }
18
19        #[derive(Copy, Clone)]
20        #[repr(packed)]
21        pub struct AddressTablePacked {
22            $(pub $name: u64),+
23        }
24
25        #[derive(Copy, Clone)]
26        pub struct AddressTable {
27            $(pub $name: u64),+
28        }
29
30        impl AddressTable {
31            #[inline]
32            pub fn from_raw(table: AddressTableRaw) -> Self {
33                Self {
34                    $(
35                        $name: table.$name as u64
36                    ),+
37                }
38            }
39
40            pub const fn from_packed(table: &AddressTablePacked) -> Self {
41                Self {
42                    $(
43                        $name: table.$name
44                    ),+
45                }
46            }
47        }
48
49        static_assert!(core::mem::size_of::<AddressTableRaw>() == core::mem::size_of::<AddressTablePacked>());
50        static_assert!(core::mem::size_of::<AddressTableRaw>() == core::mem::size_of::<AddressTable>());
51    }
52}
53
54// These are the addresses exported from the zygote.
55define_address_table! {
56    syscall_hostcall: unsafe extern "C" fn(u32),
57    syscall_trap: unsafe extern "C" fn() -> !,
58    syscall_return: unsafe extern "C" fn() -> !,
59    syscall_trace: unsafe extern "C" fn(u32, u64),
60    syscall_sbrk: unsafe extern "C" fn(u64) -> u32,
61}
62
63/// The address where the native code starts inside of the VM.
64///
65/// This is not directly accessible by the program running inside of the VM.
66pub const VM_ADDR_NATIVE_CODE: u64 = 0x100000000;
67
68/// The address where the indirect jump table starts inside of the VM.
69///
70/// This is not directly accessible by the program running inside of the VM.
71pub const VM_ADDR_JUMP_TABLE: u64 = 0x800000000;
72
73/// The address where the return-to-host jump table vector physically resides.
74pub const VM_ADDR_JUMP_TABLE_RETURN_TO_HOST: u64 = VM_ADDR_JUMP_TABLE + ((crate::abi::VM_ADDR_RETURN_TO_HOST as u64) << 3);
75
76/// A special hostcall number set by the *host* to signal that the guest should stop executing the program.
77pub const HOSTCALL_ABORT_EXECUTION: u32 = !0;
78
79/// A special hostcall number set by the *host* to signal that the guest should execute `sbrk`.
80pub const HOSTCALL_SBRK: u32 = !0 - 1;
81
82/// A sentinel value to indicate that the instruction counter is not available.
83pub const SANDBOX_EMPTY_NTH_INSTRUCTION: u32 = !0;
84
85/// A sentinel value to indicate that the native program counter is not available.
86pub const SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER: u64 = 0;
87
88/// The address of the global per-VM context struct.
89pub const VM_ADDR_VMCTX: u64 = 0x400000000;
90
91/// The address of the signal stack.
92pub const VM_ADDR_SIGSTACK: u64 = 0x500000000;
93
94/// The address of the native stack.
95pub const VM_ADDR_NATIVE_STACK_LOW: u64 = 0x600000000;
96
97/// The size of the native stack.
98pub const VM_ADDR_NATIVE_STACK_SIZE: u64 = 0x4000;
99
100/// The address of the top of the native stack.
101pub const VM_ADDR_NATIVE_STACK_HIGH: u64 = VM_ADDR_NATIVE_STACK_LOW + VM_ADDR_NATIVE_STACK_SIZE;
102
103/// The maximum number of native code bytes that can be emitted by a single VM instruction.
104///
105/// This does *not* affect the VM ABI and can be changed at will,
106/// but should be high enough that it's never hit.
107pub const VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH: u32 = 53;
108
109/// The maximum number of native code bytes that can be emitted as an epilogue.
110///
111/// This does *not* affect the VM ABI and can be changed at will,
112/// but should be high enough that it's never hit.
113pub const VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH: u32 = 1024 * 1024;
114
115/// The maximum number of bytes the jump table can be.
116pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE: u64 = (crate::abi::VM_MAXIMUM_INSTRUCTION_COUNT as u64 + 1)
117    * core::mem::size_of::<u64>() as u64
118    * crate::abi::VM_CODE_ADDRESS_ALIGNMENT as u64;
119
120/// The maximum number of bytes the jump table can span in virtual memory.
121pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE: u64 = 0x100000000 * core::mem::size_of::<u64>() as u64;
122
123/// The maximum number of bytes the native code can be.
124pub const VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE: u32 = 512 * 1024 * 1024 - 1;
125
126/// The memory configuration used by a given program and/or sandbox instance.
127#[derive(Clone)]
128#[repr(C)]
129pub struct SandboxMemoryConfig {
130    pub memory_map: MemoryMap,
131    pub ro_data_fd_size: u32,
132    pub rw_data_fd_size: u32,
133    pub code_size: u32,
134    pub jump_table_size: u32,
135    pub sysreturn_address: u64,
136}
137
138/// A flag which will trigger the sandbox to reload its program before execution.
139pub const VM_RPC_FLAG_RECONFIGURE: u32 = 1 << 0;
140
141#[repr(C)]
142pub struct VmInit {
143    pub stack_address: AtomicU64,
144    pub stack_length: AtomicU64,
145    pub vdso_address: AtomicU64,
146    pub vdso_length: AtomicU64,
147    pub vvar_address: AtomicU64,
148    pub vvar_length: AtomicU64,
149}
150
151const MESSAGE_BUFFER_SIZE: usize = 512;
152
153#[repr(align(64))]
154pub struct CacheAligned<T>(pub T);
155
156impl<T> core::ops::Deref for CacheAligned<T> {
157    type Target = T;
158    #[inline(always)]
159    fn deref(&self) -> &Self::Target {
160        &self.0
161    }
162}
163
164impl<T> core::ops::DerefMut for CacheAligned<T> {
165    #[inline(always)]
166    fn deref_mut(&mut self) -> &mut Self::Target {
167        &mut self.0
168    }
169}
170
171#[repr(C)]
172pub struct VmCtxHeapInfo {
173    pub heap_top: UnsafeCell<u64>,
174    pub heap_threshold: UnsafeCell<u64>,
175}
176
177const REG_COUNT: usize = crate::program::Reg::ALL.len();
178
179#[repr(C)]
180pub struct VmCtxSyscall {
181    // NOTE: The order of fields here can matter for performance!
182    /// The current gas counter.
183    pub gas: UnsafeCell<i64>,
184    /// The hostcall number that was triggered.
185    pub hostcall: UnsafeCell<u32>,
186    /// A dump of all of the registers of the VM.
187    pub regs: UnsafeCell<[u32; REG_COUNT]>,
188    /// The number of the instruction just about to be executed.
189    ///
190    /// Should be treated as empty if equal to `SANDBOX_EMPTY_NTH_INSTRUCTION`.
191    pub nth_instruction: UnsafeCell<u32>,
192
193    /// The current RIP. Filled out in case of a trap or during tracing.
194    ///
195    /// Should be treated as empty if equal to `SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER`.
196    pub rip: UnsafeCell<u64>,
197}
198
199#[repr(C)]
200pub struct VmCtxCounters {
201    pub syscall_wait_loop_start: UnsafeCell<u64>,
202    pub syscall_futex_wait: UnsafeCell<u64>,
203}
204
205/// The virtual machine context.
206///
207/// This is mapped in shared memory and used by the sandbox to keep its state in,
208/// as well as by the host to communicate with the sandbox.
209#[allow(clippy::partial_pub_fields)]
210#[repr(C)]
211pub struct VmCtx {
212    /// Fields used when making syscalls from the VM into the host.
213    syscall_ffi: CacheAligned<VmCtxSyscall>,
214
215    /// The state of the program's heap.
216    pub heap_info: VmCtxHeapInfo,
217
218    /// The futex used to synchronize the sandbox with the host process.
219    pub futex: CacheAligned<AtomicU32>,
220
221    /// The address of the native code to call inside of the VM, if non-zero.
222    pub rpc_address: UnsafeCell<u64>,
223    /// Flags specifying what exactly the sandbox should do.
224    pub rpc_flags: UnsafeCell<u32>,
225    /// The amount of memory to allocate.
226    pub rpc_sbrk: UnsafeCell<u32>,
227    /// The memory configuration of the sandbox.
228    pub memory_config: UnsafeCell<SandboxMemoryConfig>,
229    /// Whether the memory of the sandbox is dirty.
230    pub is_memory_dirty: AtomicBool,
231
232    /// Performance counters. Only for debugging.
233    pub counters: CacheAligned<VmCtxCounters>,
234
235    /// One-time args used during initialization.
236    pub init: VmInit,
237
238    /// Length of the message in the message buffer.
239    pub message_length: UnsafeCell<u32>,
240    /// A buffer used to marshal error messages.
241    pub message_buffer: UnsafeCell<[u8; MESSAGE_BUFFER_SIZE]>,
242}
243
244// Make sure it fits within a single page on amd64.
245static_assert!(core::mem::size_of::<VmCtx>() <= 4096);
246
247/// The VM is busy.
248pub const VMCTX_FUTEX_BUSY: u32 = 0;
249
250/// The VM is ready to be initialized.
251pub const VMCTX_FUTEX_INIT: u32 = 1;
252
253/// The VM is idle and is waiting for work.
254pub const VMCTX_FUTEX_IDLE: u32 = 2;
255
256/// The VM has triggered a host call.
257pub const VMCTX_FUTEX_HOSTCALL: u32 = 3;
258
259/// The VM has triggered a trap.
260pub const VMCTX_FUTEX_TRAP: u32 = 4;
261
262impl VmCtx {
263    /// Creates a zeroed VM context.
264    pub const fn zeroed() -> Self {
265        VmCtx {
266            futex: CacheAligned(AtomicU32::new(VMCTX_FUTEX_BUSY)),
267
268            rpc_address: UnsafeCell::new(0),
269            rpc_flags: UnsafeCell::new(0),
270            rpc_sbrk: UnsafeCell::new(0),
271            memory_config: UnsafeCell::new(SandboxMemoryConfig {
272                memory_map: MemoryMap::empty(),
273                ro_data_fd_size: 0,
274                rw_data_fd_size: 0,
275                code_size: 0,
276                jump_table_size: 0,
277                sysreturn_address: 0,
278            }),
279            is_memory_dirty: AtomicBool::new(false),
280
281            syscall_ffi: CacheAligned(VmCtxSyscall {
282                gas: UnsafeCell::new(0),
283                hostcall: UnsafeCell::new(0),
284                regs: UnsafeCell::new([0; REG_COUNT]),
285                rip: UnsafeCell::new(0),
286                nth_instruction: UnsafeCell::new(0),
287            }),
288
289            heap_info: VmCtxHeapInfo {
290                heap_top: UnsafeCell::new(0),
291                heap_threshold: UnsafeCell::new(0),
292            },
293
294            counters: CacheAligned(VmCtxCounters {
295                syscall_wait_loop_start: UnsafeCell::new(0),
296                syscall_futex_wait: UnsafeCell::new(0),
297            }),
298
299            init: VmInit {
300                stack_address: AtomicU64::new(0),
301                stack_length: AtomicU64::new(0),
302                vdso_address: AtomicU64::new(0),
303                vdso_length: AtomicU64::new(0),
304                vvar_address: AtomicU64::new(0),
305                vvar_length: AtomicU64::new(0),
306            },
307
308            message_length: UnsafeCell::new(0),
309            message_buffer: UnsafeCell::new([0; MESSAGE_BUFFER_SIZE]),
310        }
311    }
312
313    /// Creates a fresh VM context.
314    pub const fn new() -> Self {
315        let mut vmctx = Self::zeroed();
316        vmctx.syscall_ffi.0.nth_instruction = UnsafeCell::new(SANDBOX_EMPTY_NTH_INSTRUCTION);
317        vmctx
318    }
319
320    // Define some accessor methods so that we don't have to update the rest of the codebase
321    // when we shuffle things around in the structure.
322
323    #[inline(always)]
324    pub const fn gas(&self) -> &UnsafeCell<i64> {
325        &self.syscall_ffi.0.gas
326    }
327
328    #[inline(always)]
329    pub const fn heap_info(&self) -> &VmCtxHeapInfo {
330        &self.heap_info
331    }
332
333    #[inline(always)]
334    pub const fn hostcall(&self) -> &UnsafeCell<u32> {
335        &self.syscall_ffi.0.hostcall
336    }
337
338    #[inline(always)]
339    pub const fn regs(&self) -> &UnsafeCell<[u32; REG_COUNT]> {
340        &self.syscall_ffi.0.regs
341    }
342
343    #[inline(always)]
344    pub const fn rip(&self) -> &UnsafeCell<u64> {
345        &self.syscall_ffi.0.rip
346    }
347
348    #[inline(always)]
349    pub const fn nth_instruction(&self) -> &UnsafeCell<u32> {
350        &self.syscall_ffi.0.nth_instruction
351    }
352}
353
354static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST > VM_ADDR_JUMP_TABLE);
355static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST % 0x4000 == 0);
356static_assert!(VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE <= VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE);
357static_assert!(VM_ADDR_JUMP_TABLE + VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE < VM_ADDR_JUMP_TABLE_RETURN_TO_HOST);
358static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST < VM_ADDR_JUMP_TABLE + VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE);
359static_assert!(VM_ADDR_JUMP_TABLE.count_ones() == 1);
360static_assert!((1 << VM_ADDR_JUMP_TABLE.trailing_zeros()) == VM_ADDR_JUMP_TABLE);
361
362static_assert!(
363    VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE
364        >= crate::abi::VM_MAXIMUM_INSTRUCTION_COUNT * VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH + VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH
365);
366static_assert!(VM_ADDR_NATIVE_CODE > 0xffffffff);
367static_assert!(VM_ADDR_VMCTX > 0xffffffff);
368static_assert!(VM_ADDR_NATIVE_STACK_LOW > 0xffffffff);