wasmtime_runtime/cow.rs
1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4#![cfg_attr(not(unix), allow(unused_imports, unused_variables))]
5
6use crate::MmapVec;
7use anyhow::Result;
8use libc::c_void;
9use std::fs::File;
10use std::sync::Arc;
11use std::{convert::TryFrom, ops::Range};
12use wasmtime_environ::{
13 DefinedMemoryIndex, MemoryInitialization, MemoryPlan, MemoryStyle, Module, PrimaryMap,
14};
15
16/// Backing images for memories in a module.
17///
18/// This is meant to be built once, when a module is first loaded/constructed,
19/// and then used many times for instantiation.
20pub struct ModuleMemoryImages {
21 memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
22}
23
24impl ModuleMemoryImages {
25 /// Get the MemoryImage for a given memory.
26 pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
27 self.memories[defined_index].as_ref()
28 }
29}
30
31/// One backing image for one memory.
32#[derive(Debug, PartialEq)]
33pub struct MemoryImage {
34 /// The file descriptor source of this image.
35 ///
36 /// This might be an mmaped `*.cwasm` file or on Linux it could also be a
37 /// `Memfd` as an anonymous file in memory. In either case this is used as
38 /// the backing-source for the CoW image.
39 fd: FdSource,
40
41 /// Length of image, in bytes.
42 ///
43 /// Note that initial memory size may be larger; leading and trailing zeroes
44 /// are truncated (handled by backing fd).
45 ///
46 /// Must be a multiple of the system page size.
47 len: usize,
48
49 /// Image starts this many bytes into `fd` source.
50 ///
51 /// This is 0 for anonymous-backed memfd files and is the offset of the data
52 /// section in a `*.cwasm` file for `*.cwasm`-backed images.
53 ///
54 /// Must be a multiple of the system page size.
55 fd_offset: u64,
56
57 /// Image starts this many bytes into heap space.
58 ///
59 /// Must be a multiple of the system page size.
60 linear_memory_offset: usize,
61}
62
63#[derive(Debug)]
64enum FdSource {
65 #[cfg(unix)]
66 Mmap(Arc<File>),
67 #[cfg(target_os = "linux")]
68 Memfd(memfd::Memfd),
69}
70
71impl FdSource {
72 #[cfg(unix)]
73 fn as_file(&self) -> &File {
74 match self {
75 FdSource::Mmap(ref file) => file,
76 #[cfg(target_os = "linux")]
77 FdSource::Memfd(ref memfd) => memfd.as_file(),
78 }
79 }
80}
81
82impl PartialEq for FdSource {
83 fn eq(&self, other: &FdSource) -> bool {
84 cfg_if::cfg_if! {
85 if #[cfg(unix)] {
86 use rustix::fd::AsRawFd;
87 self.as_file().as_raw_fd() == other.as_file().as_raw_fd()
88 } else {
89 drop(other);
90 match *self {}
91 }
92 }
93 }
94}
95
96impl MemoryImage {
97 fn new(
98 page_size: u32,
99 offset: u64,
100 data: &[u8],
101 mmap: Option<&MmapVec>,
102 ) -> Result<Option<MemoryImage>> {
103 // Sanity-check that various parameters are page-aligned.
104 let len = data.len();
105 assert_eq!(offset % u64::from(page_size), 0);
106 assert_eq!((len as u32) % page_size, 0);
107 let linear_memory_offset = match usize::try_from(offset) {
108 Ok(offset) => offset,
109 Err(_) => return Ok(None),
110 };
111
112 // If a backing `mmap` is present then `data` should be a sub-slice of
113 // the `mmap`. The sanity-checks here double-check that. Additionally
114 // compilation should have ensured that the `data` section is
115 // page-aligned within `mmap`, so that's also all double-checked here.
116 //
117 // Finally if the `mmap` itself comes from a backing file on disk, such
118 // as a `*.cwasm` file, then that's a valid source of data for the
119 // memory image so we simply return referencing that.
120 //
121 // Note that this path is platform-agnostic in the sense of all
122 // platforms we support support memory mapping copy-on-write data from
123 // files, but for now this is still a Linux-specific region of Wasmtime.
124 // Some work will be needed to get this file compiling for macOS and
125 // Windows.
126 #[cfg(not(windows))]
127 if let Some(mmap) = mmap {
128 let start = mmap.as_ptr() as usize;
129 let end = start + mmap.len();
130 let data_start = data.as_ptr() as usize;
131 let data_end = data_start + data.len();
132 assert!(start <= data_start && data_end <= end);
133 assert_eq!((start as u32) % page_size, 0);
134 assert_eq!((data_start as u32) % page_size, 0);
135 assert_eq!((data_end as u32) % page_size, 0);
136 assert_eq!((mmap.original_offset() as u32) % page_size, 0);
137
138 if let Some(file) = mmap.original_file() {
139 return Ok(Some(MemoryImage {
140 fd: FdSource::Mmap(file.clone()),
141 fd_offset: u64::try_from(mmap.original_offset() + (data_start - start))
142 .unwrap(),
143 linear_memory_offset,
144 len,
145 }));
146 }
147 }
148
149 // If `mmap` doesn't come from a file then platform-specific mechanisms
150 // may be used to place the data in a form that's amenable to an mmap.
151
152 cfg_if::cfg_if! {
153 if #[cfg(target_os = "linux")] {
154 // On Linux `memfd_create` is used to create an anonymous
155 // in-memory file to represent the heap image. This anonymous
156 // file is then used as the basis for further mmaps.
157
158 use std::io::Write;
159
160 let memfd = create_memfd()?;
161 memfd.as_file().write_all(data)?;
162
163 // Seal the memfd's data and length.
164 //
165 // This is a defense-in-depth security mitigation. The
166 // memfd will serve as the starting point for the heap of
167 // every instance of this module. If anything were to
168 // write to this, it could affect every execution. The
169 // memfd object itself is owned by the machinery here and
170 // not exposed elsewhere, but it is still an ambient open
171 // file descriptor at the syscall level, so some other
172 // vulnerability that allowed writes to arbitrary fds
173 // could modify it. Or we could have some issue with the
174 // way that we map it into each instance. To be
175 // extra-super-sure that it never changes, and because
176 // this costs very little, we use the kernel's "seal" API
177 // to make the memfd image permanently read-only.
178 memfd.add_seals(&[
179 memfd::FileSeal::SealGrow,
180 memfd::FileSeal::SealShrink,
181 memfd::FileSeal::SealWrite,
182 memfd::FileSeal::SealSeal,
183 ])?;
184
185 Ok(Some(MemoryImage {
186 fd: FdSource::Memfd(memfd),
187 fd_offset: 0,
188 linear_memory_offset,
189 len,
190 }))
191 } else {
192 // Other platforms don't have an easily available way of
193 // representing the heap image as an mmap-source right now. We
194 // could theoretically create a file and immediately unlink it
195 // but that means that data may likely be preserved to disk
196 // which isn't what we want here.
197 Ok(None)
198 }
199 }
200 }
201
202 unsafe fn map_at(&self, base: usize) -> Result<()> {
203 cfg_if::cfg_if! {
204 if #[cfg(unix)] {
205 let ptr = rustix::mm::mmap(
206 (base + self.linear_memory_offset) as *mut c_void,
207 self.len,
208 rustix::mm::ProtFlags::READ | rustix::mm::ProtFlags::WRITE,
209 rustix::mm::MapFlags::PRIVATE | rustix::mm::MapFlags::FIXED,
210 self.fd.as_file(),
211 self.fd_offset,
212 )?;
213 assert_eq!(ptr as usize, base + self.linear_memory_offset);
214 Ok(())
215 } else {
216 match self.fd {}
217 }
218 }
219 }
220
221 unsafe fn remap_as_zeros_at(&self, base: usize) -> Result<()> {
222 cfg_if::cfg_if! {
223 if #[cfg(unix)] {
224 let ptr = rustix::mm::mmap_anonymous(
225 (base + self.linear_memory_offset) as *mut c_void,
226 self.len,
227 rustix::mm::ProtFlags::READ | rustix::mm::ProtFlags::WRITE,
228 rustix::mm::MapFlags::PRIVATE | rustix::mm::MapFlags::FIXED,
229 )?;
230 assert_eq!(ptr as usize, base + self.linear_memory_offset);
231 Ok(())
232 } else {
233 match self.fd {}
234 }
235 }
236 }
237}
238
239#[cfg(target_os = "linux")]
240fn create_memfd() -> Result<memfd::Memfd> {
241 // Create the memfd. It needs a name, but the
242 // documentation for `memfd_create()` says that names can
243 // be duplicated with no issues.
244 memfd::MemfdOptions::new()
245 .allow_sealing(true)
246 .create("wasm-memory-image")
247 .map_err(|e| e.into())
248}
249
250impl ModuleMemoryImages {
251 /// Create a new `ModuleMemoryImages` for the given module. This can be
252 /// passed in as part of a `InstanceAllocationRequest` to speed up
253 /// instantiation and execution by using copy-on-write-backed memories.
254 pub fn new(
255 module: &Module,
256 wasm_data: &[u8],
257 mmap: Option<&MmapVec>,
258 ) -> Result<Option<ModuleMemoryImages>> {
259 let map = match &module.memory_initialization {
260 MemoryInitialization::Static { map } => map,
261 _ => return Ok(None),
262 };
263 let mut memories = PrimaryMap::with_capacity(map.len());
264 let page_size = crate::page_size() as u32;
265 for (memory_index, init) in map {
266 // mmap-based-initialization only works for defined memories with a
267 // known starting point of all zeros, so bail out if the mmeory is
268 // imported.
269 let defined_memory = match module.defined_memory_index(memory_index) {
270 Some(idx) => idx,
271 None => return Ok(None),
272 };
273
274 // If there's no initialization for this memory known then we don't
275 // need an image for the memory so push `None` and move on.
276 let init = match init {
277 Some(init) => init,
278 None => {
279 memories.push(None);
280 continue;
281 }
282 };
283
284 // Get the image for this wasm module as a subslice of `wasm_data`,
285 // and then use that to try to create the `MemoryImage`. If this
286 // creation files then we fail creating `ModuleMemoryImages` since this
287 // memory couldn't be represented.
288 let data = &wasm_data[init.data.start as usize..init.data.end as usize];
289 let image = match MemoryImage::new(page_size, init.offset, data, mmap)? {
290 Some(image) => image,
291 None => return Ok(None),
292 };
293
294 let idx = memories.push(Some(Arc::new(image)));
295 assert_eq!(idx, defined_memory);
296 }
297
298 Ok(Some(ModuleMemoryImages { memories }))
299 }
300}
301
302/// Slot management of a copy-on-write image which can be reused for the pooling
303/// allocator.
304///
305/// This data structure manages a slot of linear memory, primarily in the
306/// pooling allocator, which optionally has a contiguous memory image in the
307/// middle of it. Pictorially this data structure manages a virtual memory
308/// region that looks like:
309///
310/// ```text
311/// +--------------------+-------------------+--------------+--------------+
312/// | anonymous | optional | anonymous | PROT_NONE |
313/// | zero | memory | zero | memory |
314/// | memory | image | memory | |
315/// +--------------------+-------------------+--------------+--------------+
316/// | <------+---------->
317/// |<-----+------------> \
318/// | \ image.len
319/// | \
320/// | image.linear_memory_offset
321/// |
322/// \
323/// self.base is this virtual address
324///
325/// <------------------+------------------------------------------------>
326/// \
327/// static_size
328///
329/// <------------------+---------------------------------->
330/// \
331/// accessible
332/// ```
333///
334/// When a `MemoryImageSlot` is created it's told what the `static_size` and
335/// `accessible` limits are. Initially there is assumed to be no image in linear
336/// memory.
337///
338/// When `MemoryImageSlot::instantiate` is called then the method will perform
339/// a "synchronization" to take the image from its prior state to the new state
340/// for the image specified. The first instantiation for example will mmap the
341/// heap image into place. Upon reuse of a slot nothing happens except possibly
342/// shrinking `self.accessible`. When a new image is used then the old image is
343/// mapped to anonymous zero memory and then the new image is mapped in place.
344///
345/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
346/// is dirty then it is assumed that any memory beneath `self.accessible` could
347/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
348/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
349/// its original state to mark `dirty = false`. This is done by resetting all
350/// anonymous memory back to zero and the image itself back to its initial
351/// contents.
352///
353/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
354/// syscall will release the physical pages back to the OS but retain the
355/// original mappings, effectively resetting everything back to its initial
356/// state. Non-linux platforms will replace all memory below `self.accessible`
357/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
358#[derive(Debug)]
359pub struct MemoryImageSlot {
360 /// The base address in virtual memory of the actual heap memory.
361 ///
362 /// Bytes at this address are what is seen by the Wasm guest code.
363 ///
364 /// Note that this is stored as `usize` instead of `*mut u8` to not deal
365 /// with `Send`/`Sync.
366 base: usize,
367
368 /// The maximum static memory size which `self.accessible` can grow to.
369 static_size: usize,
370
371 /// An optional image that is currently being used in this linear memory.
372 ///
373 /// This can be `None` in which case memory is originally all zeros. When
374 /// `Some` the image describes where it's located within the image.
375 image: Option<Arc<MemoryImage>>,
376
377 /// The size of the heap that is readable and writable.
378 ///
379 /// Note that this may extend beyond the actual linear memory heap size in
380 /// the case of dynamic memories in use. Memory accesses to memory below
381 /// `self.accessible` may still page fault as pages are lazily brought in
382 /// but the faults will always be resolved by the kernel.
383 accessible: usize,
384
385 /// Whether this slot may have "dirty" pages (pages written by an
386 /// instantiation). Set by `instantiate()` and cleared by
387 /// `clear_and_remain_ready()`, and used in assertions to ensure
388 /// those methods are called properly.
389 ///
390 /// Invariant: if !dirty, then this memory slot contains a clean
391 /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
392 /// memory beyond the image up to `static_size`. The addresses
393 /// from offset 0 to `self.accessible` are R+W and set to zero or the
394 /// initial image content, as appropriate. Everything between
395 /// `self.accessible` and `self.static_size` is inaccessible.
396 dirty: bool,
397
398 /// Whether this MemoryImageSlot is responsible for mapping anonymous
399 /// memory (to hold the reservation while overwriting mappings
400 /// specific to this slot) in place when it is dropped. Default
401 /// on, unless the caller knows what they are doing.
402 clear_on_drop: bool,
403}
404
405impl MemoryImageSlot {
406 /// Create a new MemoryImageSlot. Assumes that there is an anonymous
407 /// mmap backing in the given range to start.
408 ///
409 /// The `accessible` parameter descibes how much of linear memory is
410 /// already mapped as R/W with all zero-bytes. The `static_size` value is
411 /// the maximum size of this image which `accessible` cannot grow beyond,
412 /// and all memory from `accessible` from `static_size` should be mapped as
413 /// `PROT_NONE` backed by zero-bytes.
414 pub(crate) fn create(base_addr: *mut c_void, accessible: usize, static_size: usize) -> Self {
415 let base = base_addr as usize;
416 MemoryImageSlot {
417 base,
418 static_size,
419 accessible,
420 image: None,
421 dirty: false,
422 clear_on_drop: true,
423 }
424 }
425
426 #[cfg(feature = "pooling-allocator")]
427 pub(crate) fn dummy() -> MemoryImageSlot {
428 MemoryImageSlot {
429 base: 0,
430 static_size: 0,
431 image: None,
432 accessible: 0,
433 dirty: false,
434 clear_on_drop: false,
435 }
436 }
437
438 /// Inform the MemoryImageSlot that it should *not* clear the underlying
439 /// address space when dropped. This should be used only when the
440 /// caller will clear or reuse the address space in some other
441 /// way.
442 pub(crate) fn no_clear_on_drop(&mut self) {
443 self.clear_on_drop = false;
444 }
445
446 pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
447 assert!(size_bytes <= self.static_size);
448
449 // If the heap limit already addresses accessible bytes then no syscalls
450 // are necessary since the data is already mapped into the process and
451 // waiting to go.
452 //
453 // This is used for "dynamic" memories where memory is not always
454 // decommitted during recycling (but it's still always reset).
455 if size_bytes <= self.accessible {
456 return Ok(());
457 }
458
459 // Otherwise use `mprotect` to make the new pages read/write.
460 self.set_protection(self.accessible..size_bytes, true)?;
461 self.accessible = size_bytes;
462
463 Ok(())
464 }
465
466 /// Prepares this slot for the instantiation of a new instance with the
467 /// provided linear memory image.
468 ///
469 /// The `initial_size_bytes` parameter indicates the required initial size
470 /// of the heap for the instance. The `maybe_image` is an optional initial
471 /// image for linear memory to contains. The `style` is the way compiled
472 /// code will be accessing this memory.
473 ///
474 /// The purpose of this method is to take a previously pristine slot
475 /// (`!self.dirty`) and transform its prior state into state necessary for
476 /// the given parameters. This could include, for example:
477 ///
478 /// * More memory may be made read/write if `initial_size_bytes` is larger
479 /// than `self.accessible`.
480 /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
481 /// `self.accessible` is larger than `initial_size_bytes`.
482 /// * If no image was previously in place or if the wrong image was
483 /// previously in place then `mmap` may be used to setup the initial
484 /// image.
485 pub(crate) fn instantiate(
486 &mut self,
487 initial_size_bytes: usize,
488 maybe_image: Option<&Arc<MemoryImage>>,
489 plan: &MemoryPlan,
490 ) -> Result<()> {
491 assert!(!self.dirty);
492 assert!(initial_size_bytes <= self.static_size);
493
494 // First order of business is to blow away the previous linear memory
495 // image if it doesn't match the image specified here. If one is
496 // detected then it's reset with anonymous memory which means that all
497 // of memory up to `self.accessible` will now be read/write and zero.
498 //
499 // Note that this intentionally a "small mmap" which only covers the
500 // extent of the prior initialization image in order to preserve
501 // resident memory that might come before or after the image.
502 if self.image.as_ref() != maybe_image {
503 self.remove_image()?;
504 }
505
506 // The next order of business is to ensure that `self.accessible` is
507 // appropriate. First up is to grow the read/write portion of memory if
508 // it's not large enough to accommodate `initial_size_bytes`.
509 if self.accessible < initial_size_bytes {
510 self.set_protection(self.accessible..initial_size_bytes, true)?;
511 self.accessible = initial_size_bytes;
512 }
513
514 // If (1) the accessible region is not in its initial state, and (2) the
515 // memory relies on virtual memory at all (i.e. has offset guard pages
516 // and/or is static), then we need to reset memory protections. Put
517 // another way, the only time it is safe to not reset protections is
518 // when we are using dynamic memory without any guard pages.
519 if initial_size_bytes < self.accessible
520 && (plan.offset_guard_size > 0 || matches!(plan.style, MemoryStyle::Static { .. }))
521 {
522 self.set_protection(initial_size_bytes..self.accessible, false)?;
523 self.accessible = initial_size_bytes;
524 }
525
526 // Now that memory is sized appropriately the final operation is to
527 // place the new image into linear memory. Note that this operation is
528 // skipped if `self.image` matches `maybe_image`.
529 assert!(initial_size_bytes <= self.accessible);
530 if self.image.as_ref() != maybe_image {
531 if let Some(image) = maybe_image.as_ref() {
532 assert!(
533 image.linear_memory_offset.checked_add(image.len).unwrap()
534 <= initial_size_bytes
535 );
536 if image.len > 0 {
537 unsafe {
538 image.map_at(self.base)?;
539 }
540 }
541 }
542 self.image = maybe_image.cloned();
543 }
544
545 // Flag ourselves as `dirty` which means that the next operation on this
546 // slot is required to be `clear_and_remain_ready`.
547 self.dirty = true;
548
549 Ok(())
550 }
551
552 pub(crate) fn remove_image(&mut self) -> Result<()> {
553 if let Some(image) = &self.image {
554 unsafe {
555 image.remap_as_zeros_at(self.base)?;
556 }
557 self.image = None;
558 }
559 Ok(())
560 }
561
562 /// Resets this linear memory slot back to a "pristine state".
563 ///
564 /// This will reset the memory back to its original contents on Linux or
565 /// reset the contents back to zero on other platforms. The `keep_resident`
566 /// argument is the maximum amount of memory to keep resident in this
567 /// process's memory on Linux. Up to that much memory will be `memset` to
568 /// zero where the rest of it will be reset or released with `madvise`.
569 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
570 pub(crate) fn clear_and_remain_ready(&mut self, keep_resident: usize) -> Result<()> {
571 assert!(self.dirty);
572
573 unsafe {
574 self.reset_all_memory_contents(keep_resident)?;
575 }
576
577 self.dirty = false;
578 Ok(())
579 }
580
581 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
582 unsafe fn reset_all_memory_contents(&mut self, keep_resident: usize) -> Result<()> {
583 if !cfg!(target_os = "linux") {
584 // If we're not on Linux then there's no generic platform way to
585 // reset memory back to its original state, so instead reset memory
586 // back to entirely zeros with an anonymous backing.
587 //
588 // Additionally the previous image, if any, is dropped here
589 // since it's no longer applicable to this mapping.
590 return self.reset_with_anon_memory();
591 }
592
593 match &self.image {
594 Some(image) => {
595 assert!(self.accessible >= image.linear_memory_offset + image.len);
596 if image.linear_memory_offset < keep_resident {
597 // If the image starts below the `keep_resident` then
598 // memory looks something like this:
599 //
600 // up to `keep_resident` bytes
601 // |
602 // +--------------------------+ remaining_memset
603 // | | /
604 // <--------------> <------->
605 //
606 // image_end
607 // 0 linear_memory_offset | accessible
608 // | | | |
609 // +----------------+--------------+---------+--------+
610 // | dirty memory | image | dirty memory |
611 // +----------------+--------------+---------+--------+
612 //
613 // <------+-------> <-----+-----> <---+---> <--+--->
614 // | | | |
615 // | | | |
616 // memset (1) / | madvise (4)
617 // mmadvise (2) /
618 // /
619 // memset (3)
620 //
621 //
622 // In this situation there are two disjoint regions that are
623 // `memset` manually to zero. Note that `memset (3)` may be
624 // zero bytes large. Furthermore `madvise (4)` may also be
625 // zero bytes large.
626
627 let image_end = image.linear_memory_offset + image.len;
628 let mem_after_image = self.accessible - image_end;
629 let remaining_memset =
630 (keep_resident - image.linear_memory_offset).min(mem_after_image);
631
632 // This is memset (1)
633 std::ptr::write_bytes(self.base as *mut u8, 0u8, image.linear_memory_offset);
634
635 // This is madvise (2)
636 self.madvise_reset(image.linear_memory_offset, image.len)?;
637
638 // This is memset (3)
639 std::ptr::write_bytes(
640 (self.base + image_end) as *mut u8,
641 0u8,
642 remaining_memset,
643 );
644
645 // This is madvise (4)
646 self.madvise_reset(
647 image_end + remaining_memset,
648 mem_after_image - remaining_memset,
649 )?;
650 } else {
651 // If the image starts after the `keep_resident` threshold
652 // then we memset the start of linear memory and then use
653 // madvise below for the rest of it, including the image.
654 //
655 // 0 keep_resident accessible
656 // | | |
657 // +----------------+---+----------+------------------+
658 // | dirty memory | image | dirty memory |
659 // +----------------+---+----------+------------------+
660 //
661 // <------+-------> <-------------+----------------->
662 // | |
663 // | |
664 // memset (1) madvise (2)
665 //
666 // Here only a single memset is necessary since the image
667 // started after the threshold which we're keeping resident.
668 // Note that the memset may be zero bytes here.
669
670 // This is memset (1)
671 std::ptr::write_bytes(self.base as *mut u8, 0u8, keep_resident);
672
673 // This is madvise (2)
674 self.madvise_reset(keep_resident, self.accessible - keep_resident)?;
675 }
676 }
677
678 // If there's no memory image for this slot then memset the first
679 // bytes in the memory back to zero while using `madvise` to purge
680 // the rest.
681 None => {
682 let size_to_memset = keep_resident.min(self.accessible);
683 std::ptr::write_bytes(self.base as *mut u8, 0u8, size_to_memset);
684 self.madvise_reset(size_to_memset, self.accessible - size_to_memset)?;
685 }
686 }
687
688 Ok(())
689 }
690
691 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
692 unsafe fn madvise_reset(&self, base: usize, len: usize) -> Result<()> {
693 assert!(base + len <= self.accessible);
694 if len == 0 {
695 return Ok(());
696 }
697 cfg_if::cfg_if! {
698 if #[cfg(target_os = "linux")] {
699 rustix::mm::madvise(
700 (self.base + base) as *mut c_void,
701 len,
702 rustix::mm::Advice::LinuxDontNeed,
703 )?;
704 Ok(())
705 } else {
706 unreachable!();
707 }
708 }
709 }
710
711 fn set_protection(&self, range: Range<usize>, readwrite: bool) -> Result<()> {
712 assert!(range.start <= range.end);
713 assert!(range.end <= self.static_size);
714 let start = self.base.checked_add(range.start).unwrap();
715 if range.len() == 0 {
716 return Ok(());
717 }
718
719 unsafe {
720 cfg_if::cfg_if! {
721 if #[cfg(unix)] {
722 let flags = if readwrite {
723 rustix::mm::MprotectFlags::READ | rustix::mm::MprotectFlags::WRITE
724 } else {
725 rustix::mm::MprotectFlags::empty()
726 };
727 rustix::mm::mprotect(start as *mut _, range.len(), flags)?;
728 } else {
729 use windows_sys::Win32::System::Memory::*;
730
731 let failure = if readwrite {
732 VirtualAlloc(start as _, range.len(), MEM_COMMIT, PAGE_READWRITE).is_null()
733 } else {
734 VirtualFree(start as _, range.len(), MEM_DECOMMIT) == 0
735 };
736 if failure {
737 return Err(std::io::Error::last_os_error().into());
738 }
739 }
740 }
741 }
742
743 Ok(())
744 }
745
746 pub(crate) fn has_image(&self) -> bool {
747 self.image.is_some()
748 }
749
750 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
751 pub(crate) fn is_dirty(&self) -> bool {
752 self.dirty
753 }
754
755 /// Map anonymous zeroed memory across the whole slot,
756 /// inaccessible. Used both during instantiate and during drop.
757 fn reset_with_anon_memory(&mut self) -> Result<()> {
758 if self.static_size == 0 {
759 assert!(self.image.is_none());
760 assert_eq!(self.accessible, 0);
761 return Ok(());
762 }
763
764 unsafe {
765 cfg_if::cfg_if! {
766 if #[cfg(unix)] {
767 let ptr = rustix::mm::mmap_anonymous(
768 self.base as *mut c_void,
769 self.static_size,
770 rustix::mm::ProtFlags::empty(),
771 rustix::mm::MapFlags::PRIVATE | rustix::mm::MapFlags::FIXED,
772 )?;
773 assert_eq!(ptr as usize, self.base);
774 } else {
775 use windows_sys::Win32::System::Memory::*;
776 if VirtualFree(self.base as _, self.static_size, MEM_DECOMMIT) == 0 {
777 return Err(std::io::Error::last_os_error().into());
778 }
779 }
780 }
781 }
782
783 self.image = None;
784 self.accessible = 0;
785
786 Ok(())
787 }
788}
789
790impl Drop for MemoryImageSlot {
791 fn drop(&mut self) {
792 // The MemoryImageSlot may be dropped if there is an error during
793 // instantiation: for example, if a memory-growth limiter
794 // disallows a guest from having a memory of a certain size,
795 // after we've already initialized the MemoryImageSlot.
796 //
797 // We need to return this region of the large pool mmap to a
798 // safe state (with no module-specific mappings). The
799 // MemoryImageSlot will not be returned to the MemoryPool, so a new
800 // MemoryImageSlot will be created and overwrite the mappings anyway
801 // on the slot's next use; but for safety and to avoid
802 // resource leaks it's better not to have stale mappings to a
803 // possibly-otherwise-dead module's image.
804 //
805 // To "wipe the slate clean", let's do a mmap of anonymous
806 // memory over the whole region, with PROT_NONE. Note that we
807 // *can't* simply munmap, because that leaves a hole in the
808 // middle of the pooling allocator's big memory area that some
809 // other random mmap may swoop in and take, to be trampled
810 // over by the next MemoryImageSlot later.
811 //
812 // Since we're in drop(), we can't sanely return an error if
813 // this mmap fails. Instead though the result is unwrapped here to
814 // trigger a panic if something goes wrong. Otherwise if this
815 // reset-the-mapping fails then on reuse it might be possible, depending
816 // on precisely where errors happened, that stale memory could get
817 // leaked through.
818 //
819 // The exception to all of this is if the `clear_on_drop` flag
820 // (which is set by default) is false. If so, the owner of
821 // this MemoryImageSlot has indicated that it will clean up in some
822 // other way.
823 if self.clear_on_drop {
824 self.reset_with_anon_memory().unwrap();
825 }
826 }
827}
828
829#[cfg(all(test, target_os = "linux"))]
830mod test {
831 use std::sync::Arc;
832
833 use super::{create_memfd, FdSource, MemoryImage, MemoryImageSlot, MemoryPlan, MemoryStyle};
834 use crate::mmap::Mmap;
835 use anyhow::Result;
836 use std::io::Write;
837 use wasmtime_environ::Memory;
838
839 fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
840 // Offset must be page-aligned.
841 let page_size = crate::page_size();
842 assert_eq!(offset & (page_size - 1), 0);
843 let memfd = create_memfd()?;
844 memfd.as_file().write_all(data)?;
845
846 // The image length is rounded up to the nearest page size
847 let image_len = (data.len() + page_size - 1) & !(page_size - 1);
848 memfd.as_file().set_len(image_len as u64)?;
849
850 Ok(MemoryImage {
851 fd: FdSource::Memfd(memfd),
852 len: image_len,
853 fd_offset: 0,
854 linear_memory_offset: offset,
855 })
856 }
857
858 fn dummy_memory_plan(style: MemoryStyle) -> MemoryPlan {
859 MemoryPlan {
860 style,
861 memory: Memory {
862 minimum: 0,
863 maximum: None,
864 shared: false,
865 memory64: false,
866 },
867 pre_guard_size: 0,
868 offset_guard_size: 0,
869 }
870 }
871
872 #[test]
873 fn instantiate_no_image() {
874 let plan = dummy_memory_plan(MemoryStyle::Static { bound: 4 << 30 });
875 // 4 MiB mmap'd area, not accessible
876 let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
877 // Create a MemoryImageSlot on top of it
878 let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20);
879 memfd.no_clear_on_drop();
880 assert!(!memfd.is_dirty());
881 // instantiate with 64 KiB initial size
882 memfd.instantiate(64 << 10, None, &plan).unwrap();
883 assert!(memfd.is_dirty());
884 // We should be able to access this 64 KiB (try both ends) and
885 // it should consist of zeroes.
886 let slice = mmap.as_mut_slice();
887 assert_eq!(0, slice[0]);
888 assert_eq!(0, slice[65535]);
889 slice[1024] = 42;
890 assert_eq!(42, slice[1024]);
891 // grow the heap
892 memfd.set_heap_limit(128 << 10).unwrap();
893 let slice = mmap.as_slice();
894 assert_eq!(42, slice[1024]);
895 assert_eq!(0, slice[131071]);
896 // instantiate again; we should see zeroes, even as the
897 // reuse-anon-mmap-opt kicks in
898 memfd.clear_and_remain_ready(0).unwrap();
899 assert!(!memfd.is_dirty());
900 memfd.instantiate(64 << 10, None, &plan).unwrap();
901 let slice = mmap.as_slice();
902 assert_eq!(0, slice[1024]);
903 }
904
905 #[test]
906 fn instantiate_image() {
907 let plan = dummy_memory_plan(MemoryStyle::Static { bound: 4 << 30 });
908 // 4 MiB mmap'd area, not accessible
909 let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
910 // Create a MemoryImageSlot on top of it
911 let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20);
912 memfd.no_clear_on_drop();
913 // Create an image with some data.
914 let image = Arc::new(create_memfd_with_data(4096, &[1, 2, 3, 4]).unwrap());
915 // Instantiate with this image
916 memfd.instantiate(64 << 10, Some(&image), &plan).unwrap();
917 assert!(memfd.has_image());
918 let slice = mmap.as_mut_slice();
919 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
920 slice[4096] = 5;
921 // Clear and re-instantiate same image
922 memfd.clear_and_remain_ready(0).unwrap();
923 memfd.instantiate(64 << 10, Some(&image), &plan).unwrap();
924 let slice = mmap.as_slice();
925 // Should not see mutation from above
926 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
927 // Clear and re-instantiate no image
928 memfd.clear_and_remain_ready(0).unwrap();
929 memfd.instantiate(64 << 10, None, &plan).unwrap();
930 assert!(!memfd.has_image());
931 let slice = mmap.as_slice();
932 assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]);
933 // Clear and re-instantiate image again
934 memfd.clear_and_remain_ready(0).unwrap();
935 memfd.instantiate(64 << 10, Some(&image), &plan).unwrap();
936 let slice = mmap.as_slice();
937 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
938 // Create another image with different data.
939 let image2 = Arc::new(create_memfd_with_data(4096, &[10, 11, 12, 13]).unwrap());
940 memfd.clear_and_remain_ready(0).unwrap();
941 memfd.instantiate(128 << 10, Some(&image2), &plan).unwrap();
942 let slice = mmap.as_slice();
943 assert_eq!(&[10, 11, 12, 13], &slice[4096..4100]);
944 // Instantiate the original image again; we should notice it's
945 // a different image and not reuse the mappings.
946 memfd.clear_and_remain_ready(0).unwrap();
947 memfd.instantiate(64 << 10, Some(&image), &plan).unwrap();
948 let slice = mmap.as_slice();
949 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
950 }
951
952 #[test]
953 #[cfg(target_os = "linux")]
954 fn memset_instead_of_madvise() {
955 let plan = dummy_memory_plan(MemoryStyle::Static { bound: 100 });
956 let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
957 let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20);
958 memfd.no_clear_on_drop();
959
960 // Test basics with the image
961 for image_off in [0, 4096, 8 << 10] {
962 let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
963 for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] {
964 memfd.instantiate(64 << 10, Some(&image), &plan).unwrap();
965 assert!(memfd.has_image());
966 let slice = mmap.as_mut_slice();
967 if image_off > 0 {
968 assert_eq!(slice[image_off - 1], 0);
969 }
970 assert_eq!(slice[image_off + 5], 0);
971 assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
972 slice[image_off] = 5;
973 assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
974 memfd.clear_and_remain_ready(amt_to_memset).unwrap();
975 }
976 }
977
978 // Test without an image
979 for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] {
980 memfd.instantiate(64 << 10, None, &plan).unwrap();
981 for chunk in mmap.as_mut_slice()[..64 << 10].chunks_mut(1024) {
982 assert_eq!(chunk[0], 0);
983 chunk[0] = 5;
984 }
985 memfd.clear_and_remain_ready(amt_to_memset).unwrap();
986 }
987 }
988
989 #[test]
990 #[cfg(target_os = "linux")]
991 fn dynamic() {
992 let plan = dummy_memory_plan(MemoryStyle::Dynamic { reserve: 200 });
993
994 let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
995 let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20);
996 memfd.no_clear_on_drop();
997 let image = Arc::new(create_memfd_with_data(4096, &[1, 2, 3, 4]).unwrap());
998 let initial = 64 << 10;
999
1000 // Instantiate the image and test that memory remains accessible after
1001 // it's cleared.
1002 memfd.instantiate(initial, Some(&image), &plan).unwrap();
1003 assert!(memfd.has_image());
1004 let slice = mmap.as_mut_slice();
1005 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
1006 slice[4096] = 5;
1007 assert_eq!(&[5, 2, 3, 4], &slice[4096..4100]);
1008 memfd.clear_and_remain_ready(0).unwrap();
1009 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
1010
1011 // Re-instantiate make sure it preserves memory. Grow a bit and set data
1012 // beyond the initial size.
1013 memfd.instantiate(initial, Some(&image), &plan).unwrap();
1014 assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
1015 memfd.set_heap_limit(initial * 2).unwrap();
1016 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1017 slice[initial] = 100;
1018 assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1019 memfd.clear_and_remain_ready(0).unwrap();
1020
1021 // Test that memory is still accessible, but it's been reset
1022 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1023
1024 // Instantiate again, and again memory beyond the initial size should
1025 // still be accessible. Grow into it again and make sure it works.
1026 memfd.instantiate(initial, Some(&image), &plan).unwrap();
1027 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1028 memfd.set_heap_limit(initial * 2).unwrap();
1029 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1030 slice[initial] = 100;
1031 assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1032 memfd.clear_and_remain_ready(0).unwrap();
1033
1034 // Reset the image to none and double-check everything is back to zero
1035 memfd.instantiate(64 << 10, None, &plan).unwrap();
1036 assert!(!memfd.has_image());
1037 assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]);
1038 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1039 }
1040}