referrerpolicy=no-referrer-when-downgrade

polkadot_service/
relay_chain_selection.rs

1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
16
17//! A [`SelectChain`] implementation designed for relay chains.
18//!
19//! This uses information about parachains to inform GRANDPA and BABE
20//! about blocks which are safe to build on and blocks which are safe to
21//! finalize.
22//!
23//! To learn more about chain-selection rules for Relay Chains, please see the
24//! documentation on [chain-selection][chain-selection-guide]
25//! in the implementers' guide.
26//!
27//! This is mostly a wrapper around a subsystem which implements the
28//! chain-selection rule, which leaves the code to be very simple.
29//!
30//! However, this does apply the further finality constraints to the best
31//! leaf returned from the chain selection subsystem by calling into other
32//! subsystems which yield information about approvals and disputes.
33//!
34//! [chain-selection-guide]: https://paritytech.github.io/polkadot-sdk/book/protocol-chain-selection.html
35
36#![cfg(feature = "full-node")]
37
38use super::{HeaderProvider, HeaderProviderProvider};
39use futures::channel::oneshot;
40use polkadot_node_primitives::MAX_FINALITY_LAG as PRIMITIVES_MAX_FINALITY_LAG;
41use polkadot_node_subsystem::messages::{
42	ApprovalVotingParallelMessage, ChainSelectionMessage, DisputeCoordinatorMessage,
43	HighestApprovedAncestorBlock,
44};
45use polkadot_node_subsystem_util::metrics::{self, prometheus};
46use polkadot_overseer::{AllMessages, Handle, PriorityLevel};
47use polkadot_primitives::{Block as PolkadotBlock, BlockNumber, Hash, Header as PolkadotHeader};
48use sp_consensus::{Error as ConsensusError, SelectChain};
49use std::sync::Arc;
50
51pub use sc_service::SpawnTaskHandle;
52
53/// The maximum amount of unfinalized blocks we are willing to allow due to approval checking
54/// or disputes.
55///
56/// This is a safety net that should be removed at some point in the future.
57// In sync with `MAX_HEADS_LOOK_BACK` in `approval-voting`
58// and `MAX_BATCH_SCRAPE_ANCESTORS` in `dispute-coordinator`.
59const MAX_FINALITY_LAG: polkadot_primitives::BlockNumber = PRIMITIVES_MAX_FINALITY_LAG;
60
61const LOG_TARGET: &str = "parachain::chain-selection";
62
63/// Prometheus metrics for chain-selection.
64#[derive(Debug, Default, Clone)]
65pub struct Metrics(Option<MetricsInner>);
66
67#[derive(Debug, Clone)]
68struct MetricsInner {
69	approval_checking_finality_lag: prometheus::Gauge<prometheus::U64>,
70	disputes_finality_lag: prometheus::Gauge<prometheus::U64>,
71}
72
73impl metrics::Metrics for Metrics {
74	fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
75		let metrics = MetricsInner {
76			approval_checking_finality_lag: prometheus::register(
77				prometheus::Gauge::with_opts(
78					prometheus::Opts::new(
79						"polkadot_parachain_approval_checking_finality_lag",
80						"How far behind the head of the chain the Approval Checking protocol wants to vote",
81					)
82				)?,
83				registry,
84			)?,
85			disputes_finality_lag: prometheus::register(
86				prometheus::Gauge::with_opts(
87					prometheus::Opts::new(
88						"polkadot_parachain_disputes_finality_lag",
89						"How far behind the head of the chain the Disputes protocol wants to vote",
90					)
91				)?,
92				registry,
93			)?,
94		};
95
96		Ok(Metrics(Some(metrics)))
97	}
98}
99
100impl Metrics {
101	fn note_approval_checking_finality_lag(&self, lag: BlockNumber) {
102		if let Some(ref metrics) = self.0 {
103			metrics.approval_checking_finality_lag.set(lag as _);
104		}
105	}
106
107	fn note_disputes_finality_lag(&self, lag: BlockNumber) {
108		if let Some(ref metrics) = self.0 {
109			metrics.disputes_finality_lag.set(lag as _);
110		}
111	}
112}
113
114/// Determines whether the chain is a relay chain
115/// and hence has to take approval votes and disputes
116/// into account.
117enum IsDisputesAwareWithOverseer<B: sc_client_api::Backend<PolkadotBlock>> {
118	Yes(SelectRelayChainInner<B, Handle>),
119	No,
120}
121
122impl<B> Clone for IsDisputesAwareWithOverseer<B>
123where
124	B: sc_client_api::Backend<PolkadotBlock>,
125	SelectRelayChainInner<B, Handle>: Clone,
126{
127	fn clone(&self) -> Self {
128		match self {
129			Self::Yes(ref inner) => Self::Yes(inner.clone()),
130			Self::No => Self::No,
131		}
132	}
133}
134
135/// A chain-selection implementation which provides safety for relay chains.
136pub struct SelectRelayChain<B: sc_client_api::Backend<PolkadotBlock>> {
137	longest_chain: sc_consensus::LongestChain<B, PolkadotBlock>,
138	selection: IsDisputesAwareWithOverseer<B>,
139}
140
141impl<B> Clone for SelectRelayChain<B>
142where
143	B: sc_client_api::Backend<PolkadotBlock>,
144	SelectRelayChainInner<B, Handle>: Clone,
145{
146	fn clone(&self) -> Self {
147		Self { longest_chain: self.longest_chain.clone(), selection: self.selection.clone() }
148	}
149}
150
151impl<B> SelectRelayChain<B>
152where
153	B: sc_client_api::Backend<PolkadotBlock> + 'static,
154{
155	/// Use the plain longest chain algorithm exclusively.
156	pub fn new_longest_chain(backend: Arc<B>) -> Self {
157		gum::debug!(target: LOG_TARGET, "Using {} chain selection algorithm", "longest");
158
159		Self {
160			longest_chain: sc_consensus::LongestChain::new(backend.clone()),
161			selection: IsDisputesAwareWithOverseer::No,
162		}
163	}
164
165	/// Create a new [`SelectRelayChain`] wrapping the given chain backend
166	/// and a handle to the overseer.
167	pub fn new_with_overseer(
168		backend: Arc<B>,
169		overseer: Handle,
170		metrics: Metrics,
171		spawn_handle: Option<SpawnTaskHandle>,
172	) -> Self {
173		gum::debug!(target: LOG_TARGET, "Using dispute aware relay-chain selection algorithm",);
174
175		SelectRelayChain {
176			longest_chain: sc_consensus::LongestChain::new(backend.clone()),
177			selection: IsDisputesAwareWithOverseer::Yes(SelectRelayChainInner::new(
178				backend,
179				overseer,
180				metrics,
181				spawn_handle,
182			)),
183		}
184	}
185
186	/// Allow access to the inner chain, for usage during the node setup.
187	pub fn as_longest_chain(&self) -> &sc_consensus::LongestChain<B, PolkadotBlock> {
188		&self.longest_chain
189	}
190}
191
192#[async_trait::async_trait]
193impl<B> SelectChain<PolkadotBlock> for SelectRelayChain<B>
194where
195	B: sc_client_api::Backend<PolkadotBlock> + 'static,
196{
197	async fn leaves(&self) -> Result<Vec<Hash>, ConsensusError> {
198		match self.selection {
199			IsDisputesAwareWithOverseer::Yes(ref selection) => selection.leaves().await,
200			IsDisputesAwareWithOverseer::No => self.longest_chain.leaves().await,
201		}
202	}
203
204	async fn best_chain(&self) -> Result<PolkadotHeader, ConsensusError> {
205		match self.selection {
206			IsDisputesAwareWithOverseer::Yes(ref selection) => selection.best_chain().await,
207			IsDisputesAwareWithOverseer::No => self.longest_chain.best_chain().await,
208		}
209	}
210
211	async fn finality_target(
212		&self,
213		target_hash: Hash,
214		maybe_max_number: Option<BlockNumber>,
215	) -> Result<Hash, ConsensusError> {
216		if let IsDisputesAwareWithOverseer::Yes(ref selection) = self.selection {
217			selection
218				.finality_target_with_longest_chain(target_hash, maybe_max_number)
219				.await
220		} else {
221			self.longest_chain.finality_target(target_hash, maybe_max_number).await
222		}
223	}
224}
225
226/// A chain-selection implementation which provides safety for relay chains
227/// but does not handle situations where the overseer is not yet connected.
228pub struct SelectRelayChainInner<B, OH> {
229	backend: Arc<B>,
230	overseer: OH,
231	metrics: Metrics,
232	spawn_handle: Option<SpawnTaskHandle>,
233}
234
235impl<B, OH> SelectRelayChainInner<B, OH>
236where
237	B: HeaderProviderProvider<PolkadotBlock>,
238	OH: OverseerHandleT + OverseerHandleWithPriorityT,
239{
240	/// Create a new [`SelectRelayChainInner`] wrapping the given chain backend
241	/// and a handle to the overseer.
242	pub fn new(
243		backend: Arc<B>,
244		overseer: OH,
245		metrics: Metrics,
246		spawn_handle: Option<SpawnTaskHandle>,
247	) -> Self {
248		SelectRelayChainInner { backend, overseer, metrics, spawn_handle }
249	}
250
251	fn block_header(&self, hash: Hash) -> Result<PolkadotHeader, ConsensusError> {
252		match HeaderProvider::header(self.backend.header_provider(), hash) {
253			Ok(Some(header)) => Ok(header),
254			Ok(None) =>
255				Err(ConsensusError::ChainLookup(format!("Missing header with hash {:?}", hash,))),
256			Err(e) => Err(ConsensusError::ChainLookup(format!(
257				"Lookup failed for header with hash {:?}: {:?}",
258				hash, e,
259			))),
260		}
261	}
262
263	fn block_number(&self, hash: Hash) -> Result<BlockNumber, ConsensusError> {
264		match HeaderProvider::number(self.backend.header_provider(), hash) {
265			Ok(Some(number)) => Ok(number),
266			Ok(None) =>
267				Err(ConsensusError::ChainLookup(format!("Missing number with hash {:?}", hash,))),
268			Err(e) => Err(ConsensusError::ChainLookup(format!(
269				"Lookup failed for number with hash {:?}: {:?}",
270				hash, e,
271			))),
272		}
273	}
274}
275
276impl<B, OH> Clone for SelectRelayChainInner<B, OH>
277where
278	B: HeaderProviderProvider<PolkadotBlock> + Send + Sync,
279	OH: OverseerHandleT + OverseerHandleWithPriorityT,
280{
281	fn clone(&self) -> Self {
282		SelectRelayChainInner {
283			backend: self.backend.clone(),
284			overseer: self.overseer.clone(),
285			metrics: self.metrics.clone(),
286			spawn_handle: self.spawn_handle.clone(),
287		}
288	}
289}
290
291#[derive(thiserror::Error, Debug)]
292enum Error {
293	// Oneshot for requesting leaves from chain selection got canceled - check errors in that
294	// subsystem.
295	#[error("Request for leaves from chain selection got canceled")]
296	LeavesCanceled(oneshot::Canceled),
297	#[error("Request for leaves from chain selection got canceled")]
298	BestLeafContainingCanceled(oneshot::Canceled),
299	// Requesting recent disputes oneshot got canceled.
300	#[error("Request for determining the undisputed chain from DisputeCoordinator got canceled")]
301	DetermineUndisputedChainCanceled(oneshot::Canceled),
302	#[error("Request approved ancestor from approval voting got canceled")]
303	ApprovedAncestorCanceled(oneshot::Canceled),
304	/// Chain selection returned empty leaves.
305	#[error("ChainSelection returned no leaves")]
306	EmptyLeaves,
307}
308
309/// Decoupling trait for the overseer handle.
310///
311/// Required for testing purposes.
312#[async_trait::async_trait]
313pub trait OverseerHandleT: Clone + Send + Sync {
314	async fn send_msg<M: Send + Into<AllMessages>>(&mut self, msg: M, origin: &'static str);
315}
316
317/// Trait for the overseer handle that allows sending messages with the specified priority level.
318#[async_trait::async_trait]
319pub trait OverseerHandleWithPriorityT: Clone + Send + Sync {
320	async fn send_msg_with_priority<M: Send + Into<AllMessages>>(
321		&mut self,
322		msg: M,
323		origin: &'static str,
324		priority: PriorityLevel,
325	);
326}
327
328#[async_trait::async_trait]
329impl OverseerHandleT for Handle {
330	async fn send_msg<M: Send + Into<AllMessages>>(&mut self, msg: M, origin: &'static str) {
331		Handle::send_msg(self, msg, origin).await
332	}
333}
334
335#[async_trait::async_trait]
336impl OverseerHandleWithPriorityT for Handle {
337	async fn send_msg_with_priority<M: Send + Into<AllMessages>>(
338		&mut self,
339		msg: M,
340		origin: &'static str,
341		priority: PriorityLevel,
342	) {
343		Handle::send_msg_with_priority(self, msg, origin, priority).await
344	}
345}
346
347impl<B, OH> SelectRelayChainInner<B, OH>
348where
349	B: HeaderProviderProvider<PolkadotBlock>,
350	OH: OverseerHandleT + OverseerHandleWithPriorityT + 'static,
351{
352	/// Get all leaves of the chain, i.e. block hashes that are suitable to
353	/// build upon and have no suitable children.
354	async fn leaves(&self) -> Result<Vec<Hash>, ConsensusError> {
355		let (tx, rx) = oneshot::channel();
356
357		self.overseer
358			.clone()
359			.send_msg(ChainSelectionMessage::Leaves(tx), std::any::type_name::<Self>())
360			.await;
361
362		let leaves = rx
363			.await
364			.map_err(Error::LeavesCanceled)
365			.map_err(|e| ConsensusError::Other(Box::new(e)))?;
366
367		gum::trace!(target: LOG_TARGET, ?leaves, "Chain selection leaves");
368
369		Ok(leaves)
370	}
371
372	/// Among all leaves, pick the one which is the best chain to build upon.
373	async fn best_chain(&self) -> Result<PolkadotHeader, ConsensusError> {
374		// The Chain Selection subsystem is supposed to treat the finalized
375		// block as the best leaf in the case that there are no viable
376		// leaves, so this should not happen in practice.
377		let best_leaf = *self
378			.leaves()
379			.await?
380			.first()
381			.ok_or_else(|| ConsensusError::Other(Box::new(Error::EmptyLeaves)))?;
382
383		gum::trace!(target: LOG_TARGET, ?best_leaf, "Best chain");
384
385		self.block_header(best_leaf)
386	}
387
388	/// Get the best descendant of `target_hash` that we should attempt to
389	/// finalize next, if any. It is valid to return the `target_hash` if
390	/// no better block exists.
391	///
392	/// This will search all leaves to find the best one containing the
393	/// given target hash, and then constrain to the given block number.
394	///
395	/// It will also constrain the chain to only chains which are fully
396	/// approved, and chains which contain no disputes.
397	pub(crate) async fn finality_target_with_longest_chain(
398		&self,
399		target_hash: Hash,
400		maybe_max_number: Option<BlockNumber>,
401	) -> Result<Hash, ConsensusError> {
402		let mut overseer = self.overseer.clone();
403
404		let subchain_head = {
405			let (tx, rx) = oneshot::channel();
406			overseer
407				.send_msg(
408					ChainSelectionMessage::BestLeafContaining(target_hash, tx),
409					std::any::type_name::<Self>(),
410				)
411				.await;
412
413			let best = rx
414				.await
415				.map_err(Error::BestLeafContainingCanceled)
416				.map_err(|e| ConsensusError::Other(Box::new(e)))?;
417
418			gum::trace!(target: LOG_TARGET, ?best, "Best leaf containing");
419
420			match best {
421				// No viable leaves containing the block.
422				None => return Ok(target_hash),
423				Some(best) => best,
424			}
425		};
426
427		let target_number = self.block_number(target_hash)?;
428
429		// 1. Constrain the leaf according to `maybe_max_number`.
430		let subchain_head = match maybe_max_number {
431			None => subchain_head,
432			Some(max) => {
433				if max <= target_number {
434					if max < target_number {
435						gum::warn!(
436							LOG_TARGET,
437							max_number = max,
438							target_number,
439							"`finality_target` max number is less than target number",
440						);
441					}
442					return Ok(target_hash)
443				}
444				// find the current number.
445				let subchain_header = self.block_header(subchain_head)?;
446
447				if subchain_header.number <= max {
448					gum::trace!(target: LOG_TARGET, ?subchain_head, "Constrained sub-chain head",);
449					subchain_head
450				} else {
451					let (ancestor_hash, _) =
452						crate::grandpa_support::walk_backwards_to_target_block(
453							self.backend.header_provider(),
454							max,
455							&subchain_header,
456						)
457						.map_err(|e| ConsensusError::ChainLookup(format!("{:?}", e)))?;
458					gum::trace!(
459						target: LOG_TARGET,
460						?ancestor_hash,
461						"Grandpa walk backwards sub-chain head"
462					);
463					ancestor_hash
464				}
465			},
466		};
467
468		let initial_leaf = subchain_head;
469		let initial_leaf_number = self.block_number(initial_leaf)?;
470
471		// 2. Constrain according to `ApprovedAncestor`.
472		let (subchain_head, subchain_number, subchain_block_descriptions) = {
473			let (tx, rx) = oneshot::channel();
474			overseer
475				.send_msg_with_priority(
476					ApprovalVotingParallelMessage::ApprovedAncestor(
477						subchain_head,
478						target_number,
479						tx,
480					),
481					std::any::type_name::<Self>(),
482					PriorityLevel::High,
483				)
484				.await;
485
486			match rx
487				.await
488				.map_err(Error::ApprovedAncestorCanceled)
489				.map_err(|e| ConsensusError::Other(Box::new(e)))?
490			{
491				// No approved ancestors means target hash is maximal vote.
492				None => (target_hash, target_number, Vec::new()),
493				Some(HighestApprovedAncestorBlock { number, hash, descriptions }) =>
494					(hash, number, descriptions),
495			}
496		};
497
498		gum::trace!(target: LOG_TARGET, ?subchain_head, "Ancestor approval restriction applied",);
499
500		let lag = initial_leaf_number.saturating_sub(subchain_number);
501		self.metrics.note_approval_checking_finality_lag(lag);
502
503		// Messages sent to `approval-distribution` are known to have high `ToF`, we need to spawn a
504		// task for sending the message to not block here and delay finality.
505		if let Some(spawn_handle) = &self.spawn_handle {
506			let mut overseer_handle = self.overseer.clone();
507			let lag_update_task = async move {
508				overseer_handle
509					.send_msg_with_priority(
510						ApprovalVotingParallelMessage::ApprovalCheckingLagUpdate(lag),
511						std::any::type_name::<Self>(),
512						PriorityLevel::High,
513					)
514					.await;
515			};
516
517			spawn_handle.spawn(
518				"approval-checking-lag-update",
519				Some("relay-chain-selection"),
520				Box::pin(lag_update_task),
521			);
522		}
523
524		let (lag, subchain_head) = {
525			// Prevent sending flawed data to the dispute-coordinator.
526			if Some(subchain_block_descriptions.len() as _) !=
527				subchain_number.checked_sub(target_number)
528			{
529				gum::error!(
530					LOG_TARGET,
531					present_block_descriptions = subchain_block_descriptions.len(),
532					target_number,
533					subchain_number,
534					"Mismatch of anticipated block descriptions and block number difference.",
535				);
536				return Ok(target_hash)
537			}
538			// 3. Constrain according to disputes:
539			let (tx, rx) = oneshot::channel();
540			overseer
541				.send_msg_with_priority(
542					DisputeCoordinatorMessage::DetermineUndisputedChain {
543						base: (target_number, target_hash),
544						block_descriptions: subchain_block_descriptions,
545						tx,
546					},
547					std::any::type_name::<Self>(),
548					PriorityLevel::High,
549				)
550				.await;
551
552			// Try to fetch response from `dispute-coordinator`. If an error occurs we just log it
553			// and return `target_hash` as maximal vote. It is safer to contain this error here
554			// and not push it up the stack to cause additional issues in GRANDPA/BABE.
555			let (lag, subchain_head) =
556				match rx.await.map_err(Error::DetermineUndisputedChainCanceled) {
557					// If request succeeded we will receive (block number, block hash).
558					Ok((subchain_number, subchain_head)) => {
559						// The total lag accounting for disputes.
560						let lag_disputes = initial_leaf_number.saturating_sub(subchain_number);
561						self.metrics.note_disputes_finality_lag(lag_disputes);
562						(lag_disputes, subchain_head)
563					},
564					Err(e) => {
565						gum::error!(
566							target: LOG_TARGET,
567							error = ?e,
568							"Call to `DetermineUndisputedChain` failed",
569						);
570						// We need to return a sane finality target. But, we are unable to ensure we
571						// are not finalizing something that is being disputed or has been concluded
572						// as invalid. We will be conservative here and not vote for finality above
573						// the ancestor passed in.
574						return Ok(target_hash)
575					},
576				};
577			(lag, subchain_head)
578		};
579
580		gum::trace!(
581			target: LOG_TARGET,
582			?subchain_head,
583			"Disputed blocks in ancestry restriction applied",
584		);
585
586		// 4. Apply the maximum safeguard to the finality lag.
587		if lag > MAX_FINALITY_LAG {
588			// We need to constrain our vote as a safety net to
589			// ensure the network continues to finalize.
590			let safe_target = initial_leaf_number - MAX_FINALITY_LAG;
591
592			if safe_target <= target_number {
593				gum::warn!(target: LOG_TARGET, ?target_hash, "Safeguard enforced finalization");
594				// Minimal vote needs to be on the target number.
595				Ok(target_hash)
596			} else {
597				// Otherwise we're looking for a descendant.
598				let initial_leaf_header = self.block_header(initial_leaf)?;
599				let (forced_target, _) = crate::grandpa_support::walk_backwards_to_target_block(
600					self.backend.header_provider(),
601					safe_target,
602					&initial_leaf_header,
603				)
604				.map_err(|e| ConsensusError::ChainLookup(format!("{:?}", e)))?;
605
606				gum::warn!(
607					target: LOG_TARGET,
608					?forced_target,
609					"Safeguard enforced finalization of child"
610				);
611
612				Ok(forced_target)
613			}
614		} else {
615			Ok(subchain_head)
616		}
617	}
618}