referrerpolicy=no-referrer-when-downgrade

polkadot_node_core_pvf/
artifacts.rs

1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
16
17//! PVF artifacts (final compiled code blobs).
18//!
19//! # Lifecycle of an artifact
20//!
21//! 1. During node start-up, we prune all the cached artifacts, if any.
22//!
23//! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should
24//!    have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the
25//!    preparation process kicks in. The execution request is stashed until after the preparation is
26//!    done, and the artifact state in the host is set to [`ArtifactState::Preparing`]. Preparation
27//!    goes through the preparation queue and the pool.
28//!
29//!    1. If the artifact is already being processed, we add another execution request to the
30//!       existing preparation job, without starting a new one.
31//!
32//!    2. Note that if the state is [`ArtifactState::FailedToProcess`], we usually do not retry
33//!       preparation, though we may under certain conditions.
34//!
35//! 3. The pool gets an available worker and instructs it to work on the given PVF. The worker
36//!    starts compilation. When the worker finishes successfully, it writes the serialized artifact
37//!    into a temporary file and notifies the host that it's done. The host atomically moves
38//!    (renames) the temporary file to the destination filename of the artifact.
39//!
40//! 4. If the worker concluded successfully or returned an error, then the pool notifies the queue.
41//!    In both cases, the queue reports to the host that the result is ready.
42//!
43//! 5. The host will react by changing the artifact state to either [`ArtifactState::Prepared`] or
44//!    [`ArtifactState::FailedToProcess`] for the PVF in question. On success, the
45//!    `last_time_needed` will be set to the current time. It will also dispatch the pending
46//!    execution requests.
47//!
48//! 6. On success, the execution request will come through the execution queue and ultimately be
49//!    processed by an execution worker. When this worker receives the request, it will read the
50//!    requested artifact. If it doesn't exist it reports an internal error. A request for execution
51//!    will bump the `last_time_needed` to the current time.
52//!
53//! 7. There is a separate process for pruning the prepared artifacts whose `last_time_needed` is
54//!    older by a predefined parameter. This process is run very rarely (say, once a day). Once the
55//!    artifact is expired it is removed from disk eagerly atomically.
56
57use crate::{host::PrecheckResultSender, worker_interface::WORKER_DIR_PREFIX};
58use always_assert::always;
59use polkadot_node_core_pvf_common::{error::PrepareError, pvf::PvfPrepData, ArtifactChecksum};
60use polkadot_parachain_primitives::primitives::ValidationCodeHash;
61use polkadot_primitives::ExecutorParamsPrepHash;
62use std::{
63	collections::HashMap,
64	fs,
65	path::{Path, PathBuf},
66	time::{Duration, SystemTime},
67};
68
69/// The extension to use for cached artifacts.
70const ARTIFACT_EXTENSION: &str = "pvf";
71
72/// The prefix that artifacts used to start with under the old naming scheme.
73const ARTIFACT_OLD_PREFIX: &str = "wasmtime_";
74
75pub fn generate_artifact_path(cache_path: &Path) -> PathBuf {
76	let file_name = {
77		use array_bytes::Hex;
78		use rand::RngCore;
79		let mut bytes = [0u8; 64];
80		rand::thread_rng().fill_bytes(&mut bytes);
81		bytes.hex("0x")
82	};
83	let mut artifact_path = cache_path.join(file_name);
84	artifact_path.set_extension(ARTIFACT_EXTENSION);
85	artifact_path
86}
87
88/// Identifier of an artifact. Encodes a code hash of the PVF and a hash of preparation-related
89///  executor parameter set.
90#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
91pub struct ArtifactId {
92	pub(crate) code_hash: ValidationCodeHash,
93	pub(crate) executor_params_prep_hash: ExecutorParamsPrepHash,
94}
95
96impl ArtifactId {
97	/// Creates a new artifact ID with the given hash.
98	pub fn new(
99		code_hash: ValidationCodeHash,
100		executor_params_prep_hash: ExecutorParamsPrepHash,
101	) -> Self {
102		Self { code_hash, executor_params_prep_hash }
103	}
104
105	/// Returns an artifact ID that corresponds to the PVF with given preparation-related
106	/// executor parameters.
107	pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self {
108		Self::new(pvf.code_hash(), pvf.executor_params().prep_hash())
109	}
110}
111
112/// A bundle of the artifact ID and the path.
113///
114/// Rationale for having this is two-fold:
115///
116/// - While we can derive the artifact path from the artifact id, it makes sense to carry it around
117/// sometimes to avoid extra work.
118/// - At the same time, carrying only path limiting the ability for logging.
119#[derive(Debug, Clone)]
120pub struct ArtifactPathId {
121	pub(crate) id: ArtifactId,
122	pub(crate) path: PathBuf,
123	pub(crate) checksum: ArtifactChecksum,
124}
125
126impl ArtifactPathId {
127	pub(crate) fn new(artifact_id: ArtifactId, path: &Path, checksum: ArtifactChecksum) -> Self {
128		Self { id: artifact_id, path: path.to_owned(), checksum }
129	}
130}
131
132#[derive(Debug)]
133pub enum ArtifactState {
134	/// The artifact is ready to be used by the executor.
135	///
136	/// That means that the artifact should be accessible through the path obtained by the artifact
137	/// id (unless, it was removed externally).
138	Prepared {
139		/// The checksum of the compiled artifact.
140		checksum: ArtifactChecksum,
141		/// The path of the compiled artifact.
142		path: PathBuf,
143		/// The time when the artifact was last needed.
144		///
145		/// This is updated when we get the heads up for this artifact or when we just discover
146		/// this file.
147		last_time_needed: SystemTime,
148		/// Size in bytes
149		size: u64,
150	},
151	/// A task to prepare this artifact is scheduled.
152	Preparing {
153		/// List of result senders that are waiting for a response.
154		waiting_for_response: Vec<PrecheckResultSender>,
155		/// The number of times this artifact has failed to prepare.
156		num_failures: u32,
157	},
158	/// The code couldn't be compiled due to an error. Such artifacts
159	/// never reach the executor and stay in the host's memory.
160	FailedToProcess {
161		/// Keep track of the last time that processing this artifact failed.
162		last_time_failed: SystemTime,
163		/// The number of times this artifact has failed to prepare.
164		num_failures: u32,
165		/// The last error encountered for preparation.
166		error: PrepareError,
167	},
168}
169
170/// A container of all known artifact ids and their states.
171pub struct Artifacts {
172	inner: HashMap<ArtifactId, ArtifactState>,
173}
174
175/// Parameters we use to cleanup artifacts
176/// After we hit the cache limit we remove the least used artifacts
177/// but only if they are stale more than minimum stale time
178#[derive(Debug)]
179pub struct ArtifactsCleanupConfig {
180	// Max size in bytes. Reaching it the least used artefacts are deleted
181	cache_limit: u64,
182	// Inactive time after which artefact is allowed to be deleted
183	min_stale_time: Duration,
184}
185
186impl Default for ArtifactsCleanupConfig {
187	fn default() -> Self {
188		Self {
189			cache_limit: 10 * 1024 * 1024 * 1024,              // 10 GiB
190			min_stale_time: Duration::from_secs(24 * 60 * 60), // 24 hours
191		}
192	}
193}
194
195#[cfg(test)]
196impl ArtifactsCleanupConfig {
197	pub fn new(cache_limit: u64, min_stale_time: Duration) -> Self {
198		Self { cache_limit, min_stale_time }
199	}
200}
201
202impl Artifacts {
203	#[cfg(test)]
204	pub(crate) fn empty() -> Self {
205		Self { inner: HashMap::new() }
206	}
207
208	#[cfg(test)]
209	fn len(&self) -> usize {
210		self.inner.len()
211	}
212
213	#[cfg(test)]
214	fn artifact_ids(&self) -> Vec<ArtifactId> {
215		self.inner.keys().cloned().collect()
216	}
217
218	#[cfg(feature = "test-utils")]
219	pub fn replace_artifact_checksum(
220		&mut self,
221		checksum: ArtifactChecksum,
222		new_checksum: ArtifactChecksum,
223	) {
224		for artifact in self.inner.values_mut() {
225			if let ArtifactState::Prepared { checksum: c, .. } = artifact {
226				if *c == checksum {
227					*c = new_checksum;
228				}
229			}
230		}
231	}
232
233	/// Create an empty table and the cache directory on-disk if it doesn't exist.
234	pub async fn new(cache_path: &Path) -> Self {
235		// Make sure that the cache path directory and all its parents are created.
236		let _ = tokio::fs::create_dir_all(cache_path).await;
237
238		// Delete any leftover artifacts and worker dirs from previous runs. We don't delete the
239		// entire cache directory in case the user made a mistake and set it to e.g. their home
240		// directory. This is a best-effort to do clean-up, so ignore any errors.
241		for entry in fs::read_dir(cache_path).into_iter().flatten().flatten() {
242			let path = entry.path();
243			let Some(file_name) = path.file_name().and_then(|f| f.to_str()) else { continue };
244			if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) {
245				let _ = fs::remove_dir_all(path);
246			} else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) ||
247				file_name.starts_with(ARTIFACT_OLD_PREFIX)
248			{
249				let _ = fs::remove_file(path);
250			}
251		}
252
253		Self { inner: HashMap::new() }
254	}
255
256	/// Returns the state of the given artifact by its ID.
257	pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> {
258		self.inner.get_mut(artifact_id)
259	}
260
261	/// Inform the table about the artifact with the given ID. The state will be set to "preparing".
262	///
263	/// This function must be used only for brand-new artifacts and should never be used for
264	/// replacing existing ones.
265	pub fn insert_preparing(
266		&mut self,
267		artifact_id: ArtifactId,
268		waiting_for_response: Vec<PrecheckResultSender>,
269	) {
270		// See the precondition.
271		always!(self
272			.inner
273			.insert(artifact_id, ArtifactState::Preparing { waiting_for_response, num_failures: 0 })
274			.is_none());
275	}
276
277	/// Insert an artifact with the given ID as "prepared".
278	///
279	/// This function should only be used to build the artifact table at startup with valid
280	/// artifact caches.
281	#[cfg(test)]
282	pub(crate) fn insert_prepared(
283		&mut self,
284		artifact_id: ArtifactId,
285		path: PathBuf,
286		checksum: ArtifactChecksum,
287		last_time_needed: SystemTime,
288		size: u64,
289	) {
290		// See the precondition.
291		always!(self
292			.inner
293			.insert(artifact_id, ArtifactState::Prepared { path, checksum, last_time_needed, size })
294			.is_none());
295	}
296
297	/// Remove artifact by its id.
298	pub fn remove(&mut self, artifact_id: ArtifactId) -> Option<(ArtifactId, PathBuf)> {
299		self.inner.remove(&artifact_id).and_then(|state| match state {
300			ArtifactState::Prepared { path, .. } => Some((artifact_id, path)),
301			_ => None,
302		})
303	}
304
305	/// Remove artifacts older than the given TTL when the total artifact size reaches the limit
306	/// and return id and path of the removed ones
307	pub fn prune(&mut self, cleanup_config: &ArtifactsCleanupConfig) -> Vec<(ArtifactId, PathBuf)> {
308		let mut to_remove = vec![];
309		let now = SystemTime::now();
310
311		let mut total_size = 0;
312		let mut artifact_sizes = vec![];
313
314		for (k, v) in self.inner.iter() {
315			if let ArtifactState::Prepared { ref path, last_time_needed, size, .. } = *v {
316				total_size += size;
317				artifact_sizes.push((k.clone(), path.clone(), size, last_time_needed));
318			}
319		}
320		artifact_sizes
321			.sort_by_key(|&(_, _, _, last_time_needed)| std::cmp::Reverse(last_time_needed));
322
323		while total_size > cleanup_config.cache_limit {
324			let Some((artifact_id, path, size, last_time_needed)) = artifact_sizes.pop() else {
325				break
326			};
327
328			let used_recently = now
329				.duration_since(last_time_needed)
330				.map(|stale_time| stale_time < cleanup_config.min_stale_time)
331				.unwrap_or(true);
332			if used_recently {
333				break;
334			}
335
336			self.inner.remove(&artifact_id);
337			to_remove.push((artifact_id, path));
338			total_size -= size;
339		}
340
341		to_remove
342	}
343}
344
345#[cfg(test)]
346mod tests {
347	use crate::testing::artifact_id;
348
349	use super::*;
350
351	#[tokio::test]
352	async fn cache_cleared_on_startup() {
353		let tempdir = tempfile::tempdir().unwrap();
354		let cache_path = tempdir.path();
355
356		// These should be cleared.
357		fs::write(cache_path.join("abcd.pvf"), "test").unwrap();
358		fs::write(cache_path.join("wasmtime_..."), "test").unwrap();
359		fs::create_dir(cache_path.join("worker-dir-prepare-test")).unwrap();
360
361		// These should not be touched.
362		fs::write(cache_path.join("abcd.pvfartifact"), "test").unwrap();
363		fs::write(cache_path.join("polkadot_..."), "test").unwrap();
364		fs::create_dir(cache_path.join("worker-prepare-test")).unwrap();
365
366		let artifacts = Artifacts::new(cache_path).await;
367
368		let entries: Vec<String> = fs::read_dir(&cache_path)
369			.unwrap()
370			.map(|entry| entry.unwrap().file_name().into_string().unwrap())
371			.collect();
372		assert_eq!(entries.len(), 3);
373		assert!(entries.contains(&String::from("abcd.pvfartifact")));
374		assert!(entries.contains(&String::from("polkadot_...")));
375		assert!(entries.contains(&String::from("worker-prepare-test")));
376		assert_eq!(artifacts.len(), 0);
377	}
378
379	#[tokio::test]
380	async fn test_pruned_by_cache_size() {
381		let mock_now = SystemTime::now();
382		let tempdir = tempfile::tempdir().unwrap();
383		let cache_path = tempdir.path();
384
385		let path1 = generate_artifact_path(cache_path);
386		let path2 = generate_artifact_path(cache_path);
387		let path3 = generate_artifact_path(cache_path);
388		let artifact_id1 = artifact_id(1);
389		let artifact_id2 = artifact_id(2);
390		let artifact_id3 = artifact_id(3);
391
392		let mut artifacts = Artifacts::new(cache_path).await;
393		let cleanup_config = ArtifactsCleanupConfig::new(1500, Duration::from_secs(0));
394
395		artifacts.insert_prepared(
396			artifact_id1.clone(),
397			path1.clone(),
398			Default::default(),
399			mock_now - Duration::from_secs(5),
400			1024,
401		);
402		artifacts.insert_prepared(
403			artifact_id2.clone(),
404			path2.clone(),
405			Default::default(),
406			mock_now - Duration::from_secs(10),
407			1024,
408		);
409		artifacts.insert_prepared(
410			artifact_id3.clone(),
411			path3.clone(),
412			Default::default(),
413			mock_now - Duration::from_secs(15),
414			1024,
415		);
416
417		let pruned = artifacts.prune(&cleanup_config);
418
419		assert!(artifacts.artifact_ids().contains(&artifact_id1));
420		assert!(!pruned.contains(&(artifact_id1, path1)));
421		assert!(!artifacts.artifact_ids().contains(&artifact_id2));
422		assert!(pruned.contains(&(artifact_id2, path2)));
423		assert!(!artifacts.artifact_ids().contains(&artifact_id3));
424		assert!(pruned.contains(&(artifact_id3, path3)));
425	}
426
427	#[tokio::test]
428	async fn test_did_not_prune_by_cache_size_because_of_stale_time() {
429		let mock_now = SystemTime::now();
430		let tempdir = tempfile::tempdir().unwrap();
431		let cache_path = tempdir.path();
432
433		let path1 = generate_artifact_path(cache_path);
434		let path2 = generate_artifact_path(cache_path);
435		let path3 = generate_artifact_path(cache_path);
436		let artifact_id1 = artifact_id(1);
437		let artifact_id2 = artifact_id(2);
438		let artifact_id3 = artifact_id(3);
439
440		let mut artifacts = Artifacts::new(cache_path).await;
441		let cleanup_config = ArtifactsCleanupConfig::new(1500, Duration::from_secs(12));
442
443		artifacts.insert_prepared(
444			artifact_id1.clone(),
445			path1.clone(),
446			Default::default(),
447			mock_now - Duration::from_secs(5),
448			1024,
449		);
450		artifacts.insert_prepared(
451			artifact_id2.clone(),
452			path2.clone(),
453			Default::default(),
454			mock_now - Duration::from_secs(10),
455			1024,
456		);
457		artifacts.insert_prepared(
458			artifact_id3.clone(),
459			path3.clone(),
460			Default::default(),
461			mock_now - Duration::from_secs(15),
462			1024,
463		);
464
465		let pruned = artifacts.prune(&cleanup_config);
466
467		assert!(artifacts.artifact_ids().contains(&artifact_id1));
468		assert!(!pruned.contains(&(artifact_id1, path1)));
469		assert!(artifacts.artifact_ids().contains(&artifact_id2));
470		assert!(!pruned.contains(&(artifact_id2, path2)));
471		assert!(!artifacts.artifact_ids().contains(&artifact_id3));
472		assert!(pruned.contains(&(artifact_id3, path3)));
473	}
474}