polkadot_node_core_pvf_common/execute.rs
1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
16
17use crate::{error::InternalValidationError, ArtifactChecksum};
18use codec::{Decode, Encode};
19use polkadot_node_primitives::PoV;
20use polkadot_parachain_primitives::primitives::ValidationResult;
21use polkadot_primitives::{ExecutorParams, PersistedValidationData};
22use std::time::Duration;
23
24/// The payload of the one-time handshake that is done when a worker process is created. Carries
25/// data from the host to the worker.
26#[derive(Encode, Decode)]
27pub struct Handshake {
28 /// The executor parameters.
29 pub executor_params: ExecutorParams,
30}
31
32/// A request to execute a PVF
33#[derive(Encode, Decode)]
34pub struct ExecuteRequest {
35 /// Persisted validation data.
36 pub pvd: PersistedValidationData,
37 /// Proof-of-validity.
38 pub pov: PoV,
39 /// Execution timeout.
40 pub execution_timeout: Duration,
41 /// Checksum of the artifact to execute.
42 pub artifact_checksum: ArtifactChecksum,
43}
44
45/// The response from the execution worker.
46#[derive(Debug, Encode, Decode)]
47pub struct WorkerResponse {
48 /// The response from the execute job process.
49 pub job_response: JobResponse,
50 /// The amount of CPU time taken by the job.
51 pub duration: Duration,
52 /// The uncompressed PoV size.
53 pub pov_size: u32,
54}
55
56/// An error occurred in the worker process.
57#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
58pub enum WorkerError {
59 /// The job timed out.
60 #[error("The job timed out")]
61 JobTimedOut,
62 /// The job process has died. We must kill the worker just in case.
63 ///
64 /// We cannot treat this as an internal error because malicious code may have killed the job.
65 /// We still retry it, because in the non-malicious case it is likely spurious.
66 #[error("The job process (pid {job_pid}) has died: {err}")]
67 JobDied { err: String, job_pid: i32 },
68 /// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
69 /// etc.
70 ///
71 /// Because malicious code can cause a job error, we must not treat it as an internal error. We
72 /// still retry it, because in the non-malicious case it is likely spurious.
73 #[error("An unexpected error occurred in the job process: {0}")]
74 JobError(#[from] JobError),
75
76 /// Some internal error occurred.
77 #[error("An internal error occurred: {0}")]
78 InternalError(#[from] InternalValidationError),
79}
80
81/// The result of a job on the execution worker.
82pub type JobResult = Result<JobResponse, JobError>;
83
84/// The successful response from a job on the execution worker.
85#[derive(Debug, Encode, Decode)]
86pub enum JobResponse {
87 Ok {
88 /// The result of parachain validation.
89 result_descriptor: ValidationResult,
90 },
91 /// A possibly transient runtime instantiation error happened during the execution; may be
92 /// retried with re-preparation
93 RuntimeConstruction(String),
94 /// The candidate is invalid.
95 InvalidCandidate(String),
96 /// PoV decompression failed
97 PoVDecompressionFailure,
98 /// The artifact is corrupted, re-prepare the artifact and try again.
99 CorruptedArtifact,
100}
101
102impl JobResponse {
103 /// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty).
104 pub fn format_invalid(ctx: &'static str, msg: &str) -> Self {
105 if msg.is_empty() {
106 Self::InvalidCandidate(ctx.to_string())
107 } else {
108 Self::InvalidCandidate(format!("{}: {}", ctx, msg))
109 }
110 }
111
112 /// Creates a may retry response from a context `ctx` and a message `msg` (which can be empty).
113 pub fn runtime_construction(ctx: &'static str, msg: &str) -> Self {
114 if msg.is_empty() {
115 Self::RuntimeConstruction(ctx.to_string())
116 } else {
117 Self::RuntimeConstruction(format!("{}: {}", ctx, msg))
118 }
119 }
120}
121
122/// An unexpected error occurred in the execution job process. Because this comes from the job,
123/// which executes untrusted code, this error must likewise be treated as untrusted. That is, we
124/// cannot raise an internal error based on this.
125#[derive(thiserror::Error, Clone, Debug, Encode, Decode)]
126pub enum JobError {
127 #[error("The job timed out")]
128 TimedOut,
129 #[error("An unexpected panic has occurred in the execution job: {0}")]
130 Panic(String),
131 /// Some error occurred when interfacing with the kernel.
132 #[error("Error interfacing with the kernel: {0}")]
133 Kernel(String),
134 #[error("Could not spawn the requested thread: {0}")]
135 CouldNotSpawnThread(String),
136 #[error("An error occurred in the CPU time monitor thread: {0}")]
137 CpuTimeMonitorThread(String),
138 /// Since the job can return any exit status it wants, we have to treat this as untrusted.
139 #[error("Unexpected exit status: {0}")]
140 UnexpectedExitStatus(i32),
141}