referrerpolicy=no-referrer-when-downgrade

polkadot_service/
relay_chain_selection.rs

1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
16
17//! A [`SelectChain`] implementation designed for relay chains.
18//!
19//! This uses information about parachains to inform GRANDPA and BABE
20//! about blocks which are safe to build on and blocks which are safe to
21//! finalize.
22//!
23//! To learn more about chain-selection rules for Relay Chains, please see the
24//! documentation on [chain-selection][chain-selection-guide]
25//! in the implementers' guide.
26//!
27//! This is mostly a wrapper around a subsystem which implements the
28//! chain-selection rule, which leaves the code to be very simple.
29//!
30//! However, this does apply the further finality constraints to the best
31//! leaf returned from the chain selection subsystem by calling into other
32//! subsystems which yield information about approvals and disputes.
33//!
34//! [chain-selection-guide]: https://paritytech.github.io/polkadot-sdk/book/protocol-chain-selection.html
35
36#![cfg(feature = "full-node")]
37
38use super::{HeaderProvider, HeaderProviderProvider};
39use futures::channel::oneshot;
40use polkadot_node_primitives::MAX_FINALITY_LAG as PRIMITIVES_MAX_FINALITY_LAG;
41use polkadot_node_subsystem::messages::{
42	ApprovalVotingParallelMessage, ChainSelectionMessage, DisputeCoordinatorMessage,
43	HighestApprovedAncestorBlock,
44};
45use polkadot_node_subsystem_util::metrics::{self, prometheus};
46use polkadot_overseer::{AllMessages, Handle, PriorityLevel};
47use polkadot_primitives::{Block as PolkadotBlock, BlockNumber, Hash, Header as PolkadotHeader};
48use sp_consensus::{Error as ConsensusError, SelectChain};
49use std::sync::Arc;
50
51pub use sc_service::SpawnTaskHandle;
52
53/// The maximum amount of unfinalized blocks we are willing to allow due to approval checking
54/// or disputes.
55///
56/// This is a safety net that should be removed at some point in the future.
57// In sync with `MAX_HEADS_LOOK_BACK` in `approval-voting`
58// and `MAX_BATCH_SCRAPE_ANCESTORS` in `dispute-coordinator`.
59const MAX_FINALITY_LAG: polkadot_primitives::BlockNumber = PRIMITIVES_MAX_FINALITY_LAG;
60
61const LOG_TARGET: &str = "parachain::chain-selection";
62
63/// Prometheus metrics for chain-selection.
64#[derive(Debug, Default, Clone)]
65pub struct Metrics(Option<MetricsInner>);
66
67#[derive(Debug, Clone)]
68struct MetricsInner {
69	approval_checking_finality_lag: prometheus::Gauge<prometheus::U64>,
70	disputes_finality_lag: prometheus::Gauge<prometheus::U64>,
71}
72
73impl metrics::Metrics for Metrics {
74	fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
75		let metrics = MetricsInner {
76			approval_checking_finality_lag: prometheus::register(
77				prometheus::Gauge::with_opts(
78					prometheus::Opts::new(
79						"polkadot_parachain_approval_checking_finality_lag",
80						"How far behind the head of the chain the Approval Checking protocol wants to vote",
81					)
82				)?,
83				registry,
84			)?,
85			disputes_finality_lag: prometheus::register(
86				prometheus::Gauge::with_opts(
87					prometheus::Opts::new(
88						"polkadot_parachain_disputes_finality_lag",
89						"How far behind the head of the chain the Disputes protocol wants to vote",
90					)
91				)?,
92				registry,
93			)?,
94		};
95
96		Ok(Metrics(Some(metrics)))
97	}
98}
99
100impl Metrics {
101	fn note_approval_checking_finality_lag(&self, lag: BlockNumber) {
102		if let Some(ref metrics) = self.0 {
103			metrics.approval_checking_finality_lag.set(lag as _);
104		}
105	}
106
107	fn note_disputes_finality_lag(&self, lag: BlockNumber) {
108		if let Some(ref metrics) = self.0 {
109			metrics.disputes_finality_lag.set(lag as _);
110		}
111	}
112}
113
114/// Determines whether the chain is a relay chain
115/// and hence has to take approval votes and disputes
116/// into account.
117enum IsDisputesAwareWithOverseer<B: sc_client_api::Backend<PolkadotBlock>> {
118	Yes(SelectRelayChainInner<B, Handle>),
119	No,
120}
121
122impl<B> Clone for IsDisputesAwareWithOverseer<B>
123where
124	B: sc_client_api::Backend<PolkadotBlock>,
125	SelectRelayChainInner<B, Handle>: Clone,
126{
127	fn clone(&self) -> Self {
128		match self {
129			Self::Yes(ref inner) => Self::Yes(inner.clone()),
130			Self::No => Self::No,
131		}
132	}
133}
134
135/// A chain-selection implementation which provides safety for relay chains.
136pub struct SelectRelayChain<B: sc_client_api::Backend<PolkadotBlock>> {
137	longest_chain: sc_consensus::LongestChain<B, PolkadotBlock>,
138	selection: IsDisputesAwareWithOverseer<B>,
139}
140
141impl<B> Clone for SelectRelayChain<B>
142where
143	B: sc_client_api::Backend<PolkadotBlock>,
144	SelectRelayChainInner<B, Handle>: Clone,
145{
146	fn clone(&self) -> Self {
147		Self { longest_chain: self.longest_chain.clone(), selection: self.selection.clone() }
148	}
149}
150
151impl<B> SelectRelayChain<B>
152where
153	B: sc_client_api::Backend<PolkadotBlock> + 'static,
154{
155	/// Use the plain longest chain algorithm exclusively.
156	pub fn new_longest_chain(backend: Arc<B>) -> Self {
157		gum::debug!(target: LOG_TARGET, "Using {} chain selection algorithm", "longest");
158
159		Self {
160			longest_chain: sc_consensus::LongestChain::new(backend.clone()),
161			selection: IsDisputesAwareWithOverseer::No,
162		}
163	}
164
165	/// Create a new [`SelectRelayChain`] wrapping the given chain backend
166	/// and a handle to the overseer.
167	pub fn new_with_overseer(
168		backend: Arc<B>,
169		overseer: Handle,
170		metrics: Metrics,
171		spawn_handle: Option<SpawnTaskHandle>,
172	) -> Self {
173		gum::debug!(target: LOG_TARGET, "Using dispute aware relay-chain selection algorithm",);
174
175		SelectRelayChain {
176			longest_chain: sc_consensus::LongestChain::new(backend.clone()),
177			selection: IsDisputesAwareWithOverseer::Yes(SelectRelayChainInner::new(
178				backend,
179				overseer,
180				metrics,
181				spawn_handle,
182			)),
183		}
184	}
185
186	/// Allow access to the inner chain, for usage during the node setup.
187	pub fn as_longest_chain(&self) -> &sc_consensus::LongestChain<B, PolkadotBlock> {
188		&self.longest_chain
189	}
190}
191
192#[async_trait::async_trait]
193impl<B> SelectChain<PolkadotBlock> for SelectRelayChain<B>
194where
195	B: sc_client_api::Backend<PolkadotBlock> + 'static,
196{
197	async fn leaves(&self) -> Result<Vec<Hash>, ConsensusError> {
198		match self.selection {
199			IsDisputesAwareWithOverseer::Yes(ref selection) => selection.leaves().await,
200			IsDisputesAwareWithOverseer::No => self.longest_chain.leaves().await,
201		}
202	}
203
204	async fn best_chain(&self) -> Result<PolkadotHeader, ConsensusError> {
205		match self.selection {
206			IsDisputesAwareWithOverseer::Yes(ref selection) => selection.best_chain().await,
207			IsDisputesAwareWithOverseer::No => self.longest_chain.best_chain().await,
208		}
209	}
210
211	async fn finality_target(
212		&self,
213		target_hash: Hash,
214		maybe_max_number: Option<BlockNumber>,
215	) -> Result<Hash, ConsensusError> {
216		if let IsDisputesAwareWithOverseer::Yes(ref selection) = self.selection {
217			selection
218				.finality_target_with_longest_chain(target_hash, maybe_max_number)
219				.await
220		} else {
221			self.longest_chain.finality_target(target_hash, maybe_max_number).await
222		}
223	}
224}
225
226/// A chain-selection implementation which provides safety for relay chains
227/// but does not handle situations where the overseer is not yet connected.
228pub struct SelectRelayChainInner<B, OH> {
229	backend: Arc<B>,
230	overseer: OH,
231	metrics: Metrics,
232	spawn_handle: Option<SpawnTaskHandle>,
233}
234
235impl<B, OH> SelectRelayChainInner<B, OH>
236where
237	B: HeaderProviderProvider<PolkadotBlock>,
238	OH: OverseerHandleT + OverseerHandleWithPriorityT,
239{
240	/// Create a new [`SelectRelayChainInner`] wrapping the given chain backend
241	/// and a handle to the overseer.
242	pub fn new(
243		backend: Arc<B>,
244		overseer: OH,
245		metrics: Metrics,
246		spawn_handle: Option<SpawnTaskHandle>,
247	) -> Self {
248		SelectRelayChainInner { backend, overseer, metrics, spawn_handle }
249	}
250
251	fn block_header(&self, hash: Hash) -> Result<PolkadotHeader, ConsensusError> {
252		match HeaderProvider::header(self.backend.header_provider(), hash) {
253			Ok(Some(header)) => Ok(header),
254			Ok(None) => {
255				Err(ConsensusError::ChainLookup(format!("Missing header with hash {:?}", hash,)))
256			},
257			Err(e) => Err(ConsensusError::ChainLookup(format!(
258				"Lookup failed for header with hash {:?}: {:?}",
259				hash, e,
260			))),
261		}
262	}
263
264	fn block_number(&self, hash: Hash) -> Result<BlockNumber, ConsensusError> {
265		match HeaderProvider::number(self.backend.header_provider(), hash) {
266			Ok(Some(number)) => Ok(number),
267			Ok(None) => {
268				Err(ConsensusError::ChainLookup(format!("Missing number with hash {:?}", hash,)))
269			},
270			Err(e) => Err(ConsensusError::ChainLookup(format!(
271				"Lookup failed for number with hash {:?}: {:?}",
272				hash, e,
273			))),
274		}
275	}
276}
277
278impl<B, OH> Clone for SelectRelayChainInner<B, OH>
279where
280	B: HeaderProviderProvider<PolkadotBlock> + Send + Sync,
281	OH: OverseerHandleT + OverseerHandleWithPriorityT,
282{
283	fn clone(&self) -> Self {
284		SelectRelayChainInner {
285			backend: self.backend.clone(),
286			overseer: self.overseer.clone(),
287			metrics: self.metrics.clone(),
288			spawn_handle: self.spawn_handle.clone(),
289		}
290	}
291}
292
293#[derive(thiserror::Error, Debug)]
294enum Error {
295	// Oneshot for requesting leaves from chain selection got canceled - check errors in that
296	// subsystem.
297	#[error("Request for leaves from chain selection got canceled")]
298	LeavesCanceled(oneshot::Canceled),
299	#[error("Request for leaves from chain selection got canceled")]
300	BestLeafContainingCanceled(oneshot::Canceled),
301	// Requesting recent disputes oneshot got canceled.
302	#[error("Request for determining the undisputed chain from DisputeCoordinator got canceled")]
303	DetermineUndisputedChainCanceled(oneshot::Canceled),
304	#[error("Request approved ancestor from approval voting got canceled")]
305	ApprovedAncestorCanceled(oneshot::Canceled),
306	/// Chain selection returned empty leaves.
307	#[error("ChainSelection returned no leaves")]
308	EmptyLeaves,
309}
310
311/// Decoupling trait for the overseer handle.
312///
313/// Required for testing purposes.
314#[async_trait::async_trait]
315pub trait OverseerHandleT: Clone + Send + Sync {
316	async fn send_msg<M: Send + Into<AllMessages>>(&mut self, msg: M, origin: &'static str);
317}
318
319/// Trait for the overseer handle that allows sending messages with the specified priority level.
320#[async_trait::async_trait]
321pub trait OverseerHandleWithPriorityT: Clone + Send + Sync {
322	async fn send_msg_with_priority<M: Send + Into<AllMessages>>(
323		&mut self,
324		msg: M,
325		origin: &'static str,
326		priority: PriorityLevel,
327	);
328}
329
330#[async_trait::async_trait]
331impl OverseerHandleT for Handle {
332	async fn send_msg<M: Send + Into<AllMessages>>(&mut self, msg: M, origin: &'static str) {
333		Handle::send_msg(self, msg, origin).await
334	}
335}
336
337#[async_trait::async_trait]
338impl OverseerHandleWithPriorityT for Handle {
339	async fn send_msg_with_priority<M: Send + Into<AllMessages>>(
340		&mut self,
341		msg: M,
342		origin: &'static str,
343		priority: PriorityLevel,
344	) {
345		Handle::send_msg_with_priority(self, msg, origin, priority).await
346	}
347}
348
349impl<B, OH> SelectRelayChainInner<B, OH>
350where
351	B: HeaderProviderProvider<PolkadotBlock>,
352	OH: OverseerHandleT + OverseerHandleWithPriorityT + 'static,
353{
354	/// Get all leaves of the chain, i.e. block hashes that are suitable to
355	/// build upon and have no suitable children.
356	async fn leaves(&self) -> Result<Vec<Hash>, ConsensusError> {
357		let (tx, rx) = oneshot::channel();
358
359		self.overseer
360			.clone()
361			.send_msg(ChainSelectionMessage::Leaves(tx), std::any::type_name::<Self>())
362			.await;
363
364		let leaves = rx
365			.await
366			.map_err(Error::LeavesCanceled)
367			.map_err(|e| ConsensusError::Other(Box::new(e)))?;
368
369		gum::trace!(target: LOG_TARGET, ?leaves, "Chain selection leaves");
370
371		Ok(leaves)
372	}
373
374	/// Among all leaves, pick the one which is the best chain to build upon.
375	async fn best_chain(&self) -> Result<PolkadotHeader, ConsensusError> {
376		// The Chain Selection subsystem is supposed to treat the finalized
377		// block as the best leaf in the case that there are no viable
378		// leaves, so this should not happen in practice.
379		let best_leaf = *self
380			.leaves()
381			.await?
382			.first()
383			.ok_or_else(|| ConsensusError::Other(Box::new(Error::EmptyLeaves)))?;
384
385		gum::trace!(target: LOG_TARGET, ?best_leaf, "Best chain");
386
387		self.block_header(best_leaf)
388	}
389
390	/// Get the best descendant of `target_hash` that we should attempt to
391	/// finalize next, if any. It is valid to return the `target_hash` if
392	/// no better block exists.
393	///
394	/// This will search all leaves to find the best one containing the
395	/// given target hash, and then constrain to the given block number.
396	///
397	/// It will also constrain the chain to only chains which are fully
398	/// approved, and chains which contain no disputes.
399	pub(crate) async fn finality_target_with_longest_chain(
400		&self,
401		target_hash: Hash,
402		maybe_max_number: Option<BlockNumber>,
403	) -> Result<Hash, ConsensusError> {
404		let mut overseer = self.overseer.clone();
405
406		let subchain_head = {
407			let (tx, rx) = oneshot::channel();
408			overseer
409				.send_msg(
410					ChainSelectionMessage::BestLeafContaining(target_hash, tx),
411					std::any::type_name::<Self>(),
412				)
413				.await;
414
415			let best = rx
416				.await
417				.map_err(Error::BestLeafContainingCanceled)
418				.map_err(|e| ConsensusError::Other(Box::new(e)))?;
419
420			gum::trace!(target: LOG_TARGET, ?best, "Best leaf containing");
421
422			match best {
423				// No viable leaves containing the block.
424				None => return Ok(target_hash),
425				Some(best) => best,
426			}
427		};
428
429		let target_number = self.block_number(target_hash)?;
430
431		// 1. Constrain the leaf according to `maybe_max_number`.
432		let subchain_head = match maybe_max_number {
433			None => subchain_head,
434			Some(max) => {
435				if max <= target_number {
436					if max < target_number {
437						gum::warn!(
438							LOG_TARGET,
439							max_number = max,
440							target_number,
441							"`finality_target` max number is less than target number",
442						);
443					}
444					return Ok(target_hash);
445				}
446				// find the current number.
447				let subchain_header = self.block_header(subchain_head)?;
448
449				if subchain_header.number <= max {
450					gum::trace!(target: LOG_TARGET, ?subchain_head, "Constrained sub-chain head",);
451					subchain_head
452				} else {
453					let (ancestor_hash, _) =
454						crate::grandpa_support::walk_backwards_to_target_block(
455							self.backend.header_provider(),
456							max,
457							&subchain_header,
458						)
459						.map_err(|e| ConsensusError::ChainLookup(format!("{:?}", e)))?;
460					gum::trace!(
461						target: LOG_TARGET,
462						?ancestor_hash,
463						"Grandpa walk backwards sub-chain head"
464					);
465					ancestor_hash
466				}
467			},
468		};
469
470		let initial_leaf = subchain_head;
471		let initial_leaf_number = self.block_number(initial_leaf)?;
472
473		// 2. Constrain according to `ApprovedAncestor`.
474		let (subchain_head, subchain_number, subchain_block_descriptions) = {
475			let (tx, rx) = oneshot::channel();
476			overseer
477				.send_msg_with_priority(
478					ApprovalVotingParallelMessage::ApprovedAncestor(
479						subchain_head,
480						target_number,
481						tx,
482					),
483					std::any::type_name::<Self>(),
484					PriorityLevel::High,
485				)
486				.await;
487
488			match rx
489				.await
490				.map_err(Error::ApprovedAncestorCanceled)
491				.map_err(|e| ConsensusError::Other(Box::new(e)))?
492			{
493				// No approved ancestors means target hash is maximal vote.
494				None => (target_hash, target_number, Vec::new()),
495				Some(HighestApprovedAncestorBlock { number, hash, descriptions }) => {
496					(hash, number, descriptions)
497				},
498			}
499		};
500
501		gum::trace!(target: LOG_TARGET, ?subchain_head, "Ancestor approval restriction applied",);
502
503		let lag = initial_leaf_number.saturating_sub(subchain_number);
504		self.metrics.note_approval_checking_finality_lag(lag);
505
506		// Messages sent to `approval-distribution` are known to have high `ToF`, we need to spawn a
507		// task for sending the message to not block here and delay finality.
508		if let Some(spawn_handle) = &self.spawn_handle {
509			let mut overseer_handle = self.overseer.clone();
510			let lag_update_task = async move {
511				overseer_handle
512					.send_msg_with_priority(
513						ApprovalVotingParallelMessage::ApprovalCheckingLagUpdate(lag),
514						std::any::type_name::<Self>(),
515						PriorityLevel::High,
516					)
517					.await;
518			};
519
520			spawn_handle.spawn(
521				"approval-checking-lag-update",
522				Some("relay-chain-selection"),
523				Box::pin(lag_update_task),
524			);
525		}
526
527		let (lag, subchain_head) = {
528			// Prevent sending flawed data to the dispute-coordinator.
529			if Some(subchain_block_descriptions.len() as _) !=
530				subchain_number.checked_sub(target_number)
531			{
532				gum::error!(
533					LOG_TARGET,
534					present_block_descriptions = subchain_block_descriptions.len(),
535					target_number,
536					subchain_number,
537					"Mismatch of anticipated block descriptions and block number difference.",
538				);
539				return Ok(target_hash);
540			}
541			// 3. Constrain according to disputes:
542			let (tx, rx) = oneshot::channel();
543			overseer
544				.send_msg_with_priority(
545					DisputeCoordinatorMessage::DetermineUndisputedChain {
546						base: (target_number, target_hash),
547						block_descriptions: subchain_block_descriptions,
548						tx,
549					},
550					std::any::type_name::<Self>(),
551					PriorityLevel::High,
552				)
553				.await;
554
555			// Try to fetch response from `dispute-coordinator`. If an error occurs we just log it
556			// and return `target_hash` as maximal vote. It is safer to contain this error here
557			// and not push it up the stack to cause additional issues in GRANDPA/BABE.
558			let (lag, subchain_head) =
559				match rx.await.map_err(Error::DetermineUndisputedChainCanceled) {
560					// If request succeeded we will receive (block number, block hash).
561					Ok((subchain_number, subchain_head)) => {
562						// The total lag accounting for disputes.
563						let lag_disputes = initial_leaf_number.saturating_sub(subchain_number);
564						self.metrics.note_disputes_finality_lag(lag_disputes);
565						(lag_disputes, subchain_head)
566					},
567					Err(e) => {
568						gum::error!(
569							target: LOG_TARGET,
570							error = ?e,
571							"Call to `DetermineUndisputedChain` failed",
572						);
573						// We need to return a sane finality target. But, we are unable to ensure we
574						// are not finalizing something that is being disputed or has been concluded
575						// as invalid. We will be conservative here and not vote for finality above
576						// the ancestor passed in.
577						return Ok(target_hash);
578					},
579				};
580			(lag, subchain_head)
581		};
582
583		gum::trace!(
584			target: LOG_TARGET,
585			?subchain_head,
586			"Disputed blocks in ancestry restriction applied",
587		);
588
589		// 4. Apply the maximum safeguard to the finality lag.
590		if lag > MAX_FINALITY_LAG {
591			// We need to constrain our vote as a safety net to
592			// ensure the network continues to finalize.
593			let safe_target = initial_leaf_number - MAX_FINALITY_LAG;
594
595			if safe_target <= target_number {
596				gum::warn!(target: LOG_TARGET, ?target_hash, "Safeguard enforced finalization");
597				// Minimal vote needs to be on the target number.
598				Ok(target_hash)
599			} else {
600				// Otherwise we're looking for a descendant.
601				let initial_leaf_header = self.block_header(initial_leaf)?;
602				let (forced_target, _) = crate::grandpa_support::walk_backwards_to_target_block(
603					self.backend.header_provider(),
604					safe_target,
605					&initial_leaf_header,
606				)
607				.map_err(|e| ConsensusError::ChainLookup(format!("{:?}", e)))?;
608
609				gum::warn!(
610					target: LOG_TARGET,
611					?forced_target,
612					"Safeguard enforced finalization of child"
613				);
614
615				Ok(forced_target)
616			}
617		} else {
618			Ok(subchain_head)
619		}
620	}
621}