polkadot_node_network_protocol/request_response/
mod.rs

1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
16
17//! Overview over request/responses as used in `Polkadot`.
18//!
19//! `enum Protocol` .... List of all supported protocols.
20//!
21//! `enum Requests`  .... List of all supported requests, each entry matches one in protocols, but
22//! has the actual request as payload.
23//!
24//! `struct IncomingRequest` .... wrapper for incoming requests, containing a sender for sending
25//! responses.
26//!
27//! `struct OutgoingRequest` .... wrapper for outgoing requests, containing a sender used by the
28//! networking code for delivering responses/delivery errors.
29//!
30//! `trait IsRequest` .... A trait describing a particular request. It is used for gathering meta
31//! data, like what is the corresponding response type.
32//!
33//!  ## Versioning
34//!
35//! Versioning for request-response protocols can be done in multiple ways.
36//!
37//! If you're just changing the protocol name but the binary payloads are the same, just add a new
38//! `fallback_name` to the protocol config.
39//!
40//! One way in which versioning has historically been achieved for req-response protocols is to
41//! bundle the new req-resp version with an upgrade of a notifications protocol. The subsystem would
42//! then know which request version to use based on stored data about the peer's notifications
43//! protocol version.
44//!
45//! When bumping a notifications protocol version is not needed/desirable, you may add a new
46//! req-resp protocol and set the old request as a fallback (see
47//! `OutgoingRequest::new_with_fallback`). A request with the new version will be attempted and if
48//! the protocol is refused by the peer, the fallback protocol request will be used.
49//! Information about the actually used protocol will be returned alongside the raw response, so
50//! that you know how to decode it.
51
52use std::{collections::HashMap, time::Duration, u64};
53
54use polkadot_primitives::MAX_CODE_SIZE;
55use sc_network::{NetworkBackend, MAX_RESPONSE_SIZE};
56use sp_runtime::traits::Block;
57use strum::{EnumIter, IntoEnumIterator};
58
59pub use sc_network::{config as network, config::RequestResponseConfig, ProtocolName};
60
61/// Everything related to handling of incoming requests.
62pub mod incoming;
63/// Everything related to handling of outgoing requests.
64pub mod outgoing;
65
66pub use incoming::{IncomingRequest, IncomingRequestReceiver};
67
68pub use outgoing::{OutgoingRequest, OutgoingResult, Recipient, Requests, ResponseSender};
69
70///// Multiplexer for incoming requests.
71// pub mod multiplexer;
72
73/// Actual versioned requests and responses that are sent over the wire.
74pub mod v1;
75
76/// Actual versioned requests and responses that are sent over the wire.
77pub mod v2;
78
79/// A protocol per subsystem seems to make the most sense, this way we don't need any dispatching
80/// within protocols.
81#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, EnumIter)]
82pub enum Protocol {
83	/// Protocol for chunk fetching, used by availability distribution and availability recovery.
84	ChunkFetchingV1,
85	/// Protocol for fetching collations from collators.
86	CollationFetchingV1,
87	/// Protocol for fetching collations from collators when async backing is enabled.
88	CollationFetchingV2,
89	/// Protocol for fetching seconded PoVs from validators of the same group.
90	PoVFetchingV1,
91	/// Protocol for fetching available data.
92	AvailableDataFetchingV1,
93	/// Sending of dispute statements with application level confirmations.
94	DisputeSendingV1,
95
96	/// Protocol for requesting candidates with attestations in statement distribution
97	/// when async backing is enabled.
98	AttestedCandidateV2,
99
100	/// Protocol for chunk fetching version 2, used by availability distribution and availability
101	/// recovery.
102	ChunkFetchingV2,
103}
104
105/// Minimum bandwidth we expect for validators - 500Mbit/s is the recommendation, so approximately
106/// 50MB per second:
107const MIN_BANDWIDTH_BYTES: u64 = 50 * 1024 * 1024;
108
109/// Default request timeout in seconds.
110///
111/// When decreasing this value, take into account that the very first request might need to open a
112/// connection, which can be slow. If this causes problems, we should ensure connectivity via peer
113/// sets.
114#[allow(dead_code)]
115const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3);
116
117/// Request timeout where we can assume the connection is already open (e.g. we have peers in a
118/// peer set as well).
119const DEFAULT_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_secs(1);
120
121/// Timeout for requesting availability chunks.
122pub const CHUNK_REQUEST_TIMEOUT: Duration = DEFAULT_REQUEST_TIMEOUT_CONNECTED;
123
124/// This timeout is based on the following parameters, assuming we use asynchronous backing with no
125/// time budget within a relay block:
126/// - 500 Mbit/s networking speed
127/// - 10 MB PoV
128/// - 10 parallel executions
129const POV_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_millis(2000);
130
131/// We want attested candidate requests to time out relatively fast,
132/// because slow requests will bottleneck the backing system. Ideally, we'd have
133/// an adaptive timeout based on the candidate size, because there will be a lot of variance
134/// in candidate sizes: candidates with no code and no messages vs candidates with code
135/// and messages.
136///
137/// We supply leniency because there are often large candidates and asynchronous
138/// backing allows them to be included over a longer window of time. Exponential back-off
139/// up to a maximum of 10 seconds would be ideal, but isn't supported by the
140/// infrastructure here yet: see https://github.com/paritytech/polkadot/issues/6009
141const ATTESTED_CANDIDATE_TIMEOUT: Duration = Duration::from_millis(2500);
142
143/// We don't want a slow peer to slow down all the others, at the same time we want to get out the
144/// data quickly in full to at least some peers (as this will reduce load on us as they then can
145/// start serving the data). So this value is a tradeoff. 5 seems to be sensible. So we would need
146/// to have 5 slow nodes connected, to delay transfer for others by `ATTESTED_CANDIDATE_TIMEOUT`.
147pub const MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS: u32 = 5;
148
149/// Response size limit for responses of POV like data.
150///
151/// Same as what we use in substrate networking.
152const POV_RESPONSE_SIZE: u64 = MAX_RESPONSE_SIZE;
153
154/// Maximum response sizes for `AttestedCandidateV2`.
155///
156/// This is `MAX_CODE_SIZE` plus some additional space for protocol overhead and
157/// additional backing statements.
158const ATTESTED_CANDIDATE_RESPONSE_SIZE: u64 = MAX_CODE_SIZE as u64 + 100_000;
159
160/// We can have relative large timeouts here, there is no value of hitting a
161/// timeout as we want to get statements through to each node in any case.
162pub const DISPUTE_REQUEST_TIMEOUT: Duration = Duration::from_secs(12);
163
164impl Protocol {
165	/// Get a configuration for a given Request response protocol.
166	///
167	/// Returns a `ProtocolConfig` for this protocol.
168	/// Use this if you plan only to send requests for this protocol.
169	pub fn get_outbound_only_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
170		self,
171		req_protocol_names: &ReqProtocolNames,
172	) -> N::RequestResponseProtocolConfig {
173		self.create_config::<B, N>(req_protocol_names, None)
174	}
175
176	/// Get a configuration for a given Request response protocol.
177	///
178	/// Returns a receiver for messages received on this protocol and the requested
179	/// `ProtocolConfig`.
180	pub fn get_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
181		self,
182		req_protocol_names: &ReqProtocolNames,
183	) -> (async_channel::Receiver<network::IncomingRequest>, N::RequestResponseProtocolConfig) {
184		let (tx, rx) = async_channel::bounded(self.get_channel_size());
185		let cfg = self.create_config::<B, N>(req_protocol_names, Some(tx));
186		(rx, cfg)
187	}
188
189	fn create_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
190		self,
191		req_protocol_names: &ReqProtocolNames,
192		tx: Option<async_channel::Sender<network::IncomingRequest>>,
193	) -> N::RequestResponseProtocolConfig {
194		let name = req_protocol_names.get_name(self);
195		let legacy_names = self.get_legacy_name().into_iter().map(Into::into).collect();
196		match self {
197			Protocol::ChunkFetchingV1 | Protocol::ChunkFetchingV2 => N::request_response_config(
198				name,
199				legacy_names,
200				1_000,
201				POV_RESPONSE_SIZE,
202				// We are connected to all validators:
203				CHUNK_REQUEST_TIMEOUT,
204				tx,
205			),
206			Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 =>
207				N::request_response_config(
208					name,
209					legacy_names,
210					1_000,
211					POV_RESPONSE_SIZE,
212					// Taken from initial implementation in collator protocol:
213					POV_REQUEST_TIMEOUT_CONNECTED,
214					tx,
215				),
216			Protocol::PoVFetchingV1 => N::request_response_config(
217				name,
218				legacy_names,
219				1_000,
220				POV_RESPONSE_SIZE,
221				POV_REQUEST_TIMEOUT_CONNECTED,
222				tx,
223			),
224			Protocol::AvailableDataFetchingV1 => N::request_response_config(
225				name,
226				legacy_names,
227				1_000,
228				// Available data size is dominated by the PoV size.
229				POV_RESPONSE_SIZE,
230				POV_REQUEST_TIMEOUT_CONNECTED,
231				tx,
232			),
233			Protocol::DisputeSendingV1 => N::request_response_config(
234				name,
235				legacy_names,
236				1_000,
237				// Responses are just confirmation, in essence not even a bit. So 100 seems
238				// plenty.
239				100,
240				DISPUTE_REQUEST_TIMEOUT,
241				tx,
242			),
243			Protocol::AttestedCandidateV2 => N::request_response_config(
244				name,
245				legacy_names,
246				1_000,
247				ATTESTED_CANDIDATE_RESPONSE_SIZE,
248				ATTESTED_CANDIDATE_TIMEOUT,
249				tx,
250			),
251		}
252	}
253
254	// Channel sizes for the supported protocols.
255	fn get_channel_size(self) -> usize {
256		match self {
257			// Hundreds of validators will start requesting their chunks once they see a candidate
258			// awaiting availability on chain. Given that they will see that block at different
259			// times (due to network delays), 100 seems big enough to accommodate for "bursts",
260			// assuming we can service requests relatively quickly, which would need to be measured
261			// as well.
262			Protocol::ChunkFetchingV1 | Protocol::ChunkFetchingV2 => 100,
263			// 10 seems reasonable, considering group sizes of max 10 validators.
264			Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 => 10,
265			// 10 seems reasonable, considering group sizes of max 10 validators.
266			Protocol::PoVFetchingV1 => 10,
267			// Validators are constantly self-selecting to request available data which may lead
268			// to constant load and occasional burstiness.
269			Protocol::AvailableDataFetchingV1 => 100,
270			// Incoming requests can get bursty, we should also be able to handle them fast on
271			// average, so something in the ballpark of 100 should be fine. Nodes will retry on
272			// failure, so having a good value here is mostly about performance tuning.
273			Protocol::DisputeSendingV1 => 100,
274
275			Protocol::AttestedCandidateV2 => {
276				// We assume we can utilize up to 70% of the available bandwidth for statements.
277				// This is just a guess/estimate, with the following considerations: If we are
278				// faster than that, queue size will stay low anyway, even if not - requesters will
279				// get an immediate error, but if we are slower, requesters will run in a timeout -
280				// wasting precious time.
281				let available_bandwidth = 7 * MIN_BANDWIDTH_BYTES / 10;
282				let size = u64::saturating_sub(
283					ATTESTED_CANDIDATE_TIMEOUT.as_millis() as u64 * available_bandwidth /
284						(1000 * MAX_CODE_SIZE as u64),
285					MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS as u64,
286				);
287				debug_assert!(
288					size > 0,
289					"We should have a channel size greater zero, otherwise we won't accept any requests."
290				);
291				size as usize
292			},
293		}
294	}
295
296	/// Legacy protocol name associated with each peer set, if any.
297	/// The request will be tried on this legacy protocol name if the remote refuses to speak the
298	/// protocol.
299	const fn get_legacy_name(self) -> Option<&'static str> {
300		match self {
301			Protocol::ChunkFetchingV1 => Some("/polkadot/req_chunk/1"),
302			Protocol::CollationFetchingV1 => Some("/polkadot/req_collation/1"),
303			Protocol::PoVFetchingV1 => Some("/polkadot/req_pov/1"),
304			Protocol::AvailableDataFetchingV1 => Some("/polkadot/req_available_data/1"),
305			Protocol::DisputeSendingV1 => Some("/polkadot/send_dispute/1"),
306
307			// Introduced after legacy names became legacy.
308			Protocol::AttestedCandidateV2 => None,
309			Protocol::CollationFetchingV2 => None,
310			Protocol::ChunkFetchingV2 => None,
311		}
312	}
313}
314
315/// Common properties of any `Request`.
316pub trait IsRequest {
317	/// Each request has a corresponding `Response`.
318	type Response;
319
320	/// What protocol this `Request` implements.
321	const PROTOCOL: Protocol;
322}
323
324/// Type for getting on the wire [`Protocol`] names using genesis hash & fork id.
325#[derive(Clone)]
326pub struct ReqProtocolNames {
327	names: HashMap<Protocol, ProtocolName>,
328}
329
330impl ReqProtocolNames {
331	/// Construct [`ReqProtocolNames`] from `genesis_hash` and `fork_id`.
332	pub fn new<Hash: AsRef<[u8]>>(genesis_hash: Hash, fork_id: Option<&str>) -> Self {
333		let mut names = HashMap::new();
334		for protocol in Protocol::iter() {
335			names.insert(protocol, Self::generate_name(protocol, &genesis_hash, fork_id));
336		}
337		Self { names }
338	}
339
340	/// Get on the wire [`Protocol`] name.
341	pub fn get_name(&self, protocol: Protocol) -> ProtocolName {
342		self.names
343			.get(&protocol)
344			.expect("All `Protocol` enum variants are added above via `strum`; qed")
345			.clone()
346	}
347
348	/// Protocol name of this protocol based on `genesis_hash` and `fork_id`.
349	fn generate_name<Hash: AsRef<[u8]>>(
350		protocol: Protocol,
351		genesis_hash: &Hash,
352		fork_id: Option<&str>,
353	) -> ProtocolName {
354		let prefix = if let Some(fork_id) = fork_id {
355			format!("/{}/{}", hex::encode(genesis_hash), fork_id)
356		} else {
357			format!("/{}", hex::encode(genesis_hash))
358		};
359
360		let short_name = match protocol {
361			// V1:
362			Protocol::ChunkFetchingV1 => "/req_chunk/1",
363			Protocol::CollationFetchingV1 => "/req_collation/1",
364			Protocol::PoVFetchingV1 => "/req_pov/1",
365			Protocol::AvailableDataFetchingV1 => "/req_available_data/1",
366			Protocol::DisputeSendingV1 => "/send_dispute/1",
367
368			// V2:
369			Protocol::CollationFetchingV2 => "/req_collation/2",
370			Protocol::AttestedCandidateV2 => "/req_attested_candidate/2",
371			Protocol::ChunkFetchingV2 => "/req_chunk/2",
372		};
373
374		format!("{}{}", prefix, short_name).into()
375	}
376}
polkadot_node_network_protocol/request_response/mod.rs

polkadot_node_network_protocol/request_response/
mod.rs