polkadot_node_core_pvf/error.rs
1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
16
17use polkadot_node_core_pvf_common::error::{InternalValidationError, PrepareError};
18
19/// A error raised during validation of the candidate.
20#[derive(thiserror::Error, Debug, Clone)]
21pub enum ValidationError {
22 /// Deterministic preparation issue. In practice, most of the problems should be caught by
23 /// prechecking, so this may be a sign of internal conditions.
24 ///
25 /// In principle if preparation of the `WASM` fails, the current candidate cannot be the
26 /// reason for that. So we can't say whether it is invalid or not. In addition, with
27 /// pre-checking enabled only valid runtimes should ever get enacted, so we can be
28 /// reasonably sure that this is some local problem on the current node. However, as this
29 /// particular error *seems* to indicate a deterministic error, we raise a warning.
30 #[error("candidate validation: {0}")]
31 Preparation(PrepareError),
32 /// The error was raised because the candidate is invalid. Should vote against.
33 #[error("candidate validation: {0}")]
34 Invalid(#[from] InvalidCandidate),
35 /// Possibly transient issue that may resolve after retries. Should vote against when retries
36 /// fail.
37 #[error("candidate validation: {0}")]
38 PossiblyInvalid(#[from] PossiblyInvalidError),
39 /// Preparation or execution issue caused by an internal condition. Should not vote against.
40 #[error("candidate validation: internal: {0}")]
41 Internal(#[from] InternalValidationError),
42 /// The execution deadline of allowed_ancestry_len + 1 has been reached. Jobs like backing have
43 /// a limited time to execute. Once the deadline is reached, the current candidate cannot be
44 /// backed, regardless of its validity.
45 #[error("candidate validation: execution deadline has been reached.")]
46 ExecutionDeadline,
47}
48
49/// A description of an error raised during executing a PVF and can be attributed to the combination
50/// of the candidate [`polkadot_parachain_primitives::primitives::ValidationParams`] and the PVF.
51#[derive(thiserror::Error, Debug, Clone)]
52pub enum InvalidCandidate {
53 /// The candidate is reported to be invalid by the execution worker. The string contains the
54 /// error message.
55 #[error("invalid: worker reported: {0}")]
56 WorkerReportedInvalid(String),
57 /// PVF execution (compilation is not included) took more time than was allotted.
58 #[error("invalid: hard timeout")]
59 HardTimeout,
60 /// Proof-of-validity failed to decompress correctly
61 #[error("invalid: PoV failed to decompress")]
62 PoVDecompressionFailure,
63}
64
65/// Possibly transient issue that may resolve after retries.
66#[derive(thiserror::Error, Debug, Clone)]
67pub enum PossiblyInvalidError {
68 /// The worker process (not the job) has died during validation of a candidate.
69 ///
70 /// It's unlikely that this is caused by malicious code since workers spawn separate job
71 /// processes, and those job processes are sandboxed. But, it is possible. We retry in this
72 /// case, and if the error persists, we assume it's caused by the candidate and vote against.
73 #[error("possibly invalid: ambiguous worker death")]
74 AmbiguousWorkerDeath,
75 /// The job process (not the worker) has died for one of the following reasons:
76 ///
77 /// (a) A seccomp violation occurred, most likely due to an attempt by malicious code to
78 /// execute arbitrary code. Note that there is no foolproof way to detect this if the operator
79 /// has seccomp auditing disabled.
80 ///
81 /// (b) The host machine ran out of free memory and the OOM killer started killing the
82 /// processes, and in order to save the parent it will "sacrifice child" first.
83 ///
84 /// (c) Some other reason, perhaps transient or perhaps caused by malicious code.
85 ///
86 /// We cannot treat this as an internal error because malicious code may have caused this.
87 #[error("possibly invalid: ambiguous job death: {0}")]
88 AmbiguousJobDeath(String),
89 /// An unexpected error occurred in the job process and we can't be sure whether the candidate
90 /// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never
91 /// treat an error as internal as we would abstain from voting. This is bad because if the
92 /// issue was due to the candidate, then all validators would abstain, stalling finality on the
93 /// chain. So we will first retry the candidate, and if the issue persists we are forced to
94 /// vote invalid.
95 #[error("possibly invalid: job error: {0}")]
96 JobError(String),
97 /// Instantiation of the WASM module instance failed during an execution.
98 /// Possibly related to local issues or dirty node update. May be retried with re-preparation.
99 #[error("possibly invalid: runtime construction: {0}")]
100 RuntimeConstruction(String),
101 /// The artifact is corrupted, re-prepare the artifact and try again.
102 #[error("possibly invalid: artifact is corrupted")]
103 CorruptedArtifact,
104}
105
106impl From<PrepareError> for ValidationError {
107 fn from(error: PrepareError) -> Self {
108 // Here we need to classify the errors into two errors: deterministic and non-deterministic.
109 // See [`PrepareError::is_deterministic`].
110 if error.is_deterministic() {
111 Self::Preparation(error)
112 } else {
113 Self::Internal(InternalValidationError::NonDeterministicPrepareError(error))
114 }
115 }
116}