polkadot_node_network_protocol/request_response/mod.rs
1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
16
17//! Overview over request/responses as used in `Polkadot`.
18//!
19//! `enum Protocol` .... List of all supported protocols.
20//!
21//! `enum Requests` .... List of all supported requests, each entry matches one in protocols, but
22//! has the actual request as payload.
23//!
24//! `struct IncomingRequest` .... wrapper for incoming requests, containing a sender for sending
25//! responses.
26//!
27//! `struct OutgoingRequest` .... wrapper for outgoing requests, containing a sender used by the
28//! networking code for delivering responses/delivery errors.
29//!
30//! `trait IsRequest` .... A trait describing a particular request. It is used for gathering meta
31//! data, like what is the corresponding response type.
32//!
33//! ## Versioning
34//!
35//! Versioning for request-response protocols can be done in multiple ways.
36//!
37//! If you're just changing the protocol name but the binary payloads are the same, just add a new
38//! `fallback_name` to the protocol config.
39//!
40//! One way in which versioning has historically been achieved for req-response protocols is to
41//! bundle the new req-resp version with an upgrade of a notifications protocol. The subsystem would
42//! then know which request version to use based on stored data about the peer's notifications
43//! protocol version.
44//!
45//! When bumping a notifications protocol version is not needed/desirable, you may add a new
46//! req-resp protocol and set the old request as a fallback (see
47//! `OutgoingRequest::new_with_fallback`). A request with the new version will be attempted and if
48//! the protocol is refused by the peer, the fallback protocol request will be used.
49//! Information about the actually used protocol will be returned alongside the raw response, so
50//! that you know how to decode it.
51
52use std::{collections::HashMap, time::Duration, u64};
53
54use sc_network::{NetworkBackend, MAX_RESPONSE_SIZE};
55use sp_runtime::traits::Block;
56use strum::{EnumIter, IntoEnumIterator};
57
58pub use sc_network::{config as network, config::RequestResponseConfig, ProtocolName};
59
60/// Everything related to handling of incoming requests.
61pub mod incoming;
62/// Everything related to handling of outgoing requests.
63pub mod outgoing;
64
65pub use incoming::{IncomingRequest, IncomingRequestReceiver};
66
67pub use outgoing::{OutgoingRequest, OutgoingResult, Recipient, Requests, ResponseSender};
68
69///// Multiplexer for incoming requests.
70// pub mod multiplexer;
71
72/// Actual versioned requests and responses that are sent over the wire.
73pub mod v1;
74
75/// Actual versioned requests and responses that are sent over the wire.
76pub mod v2;
77
78/// A protocol per subsystem seems to make the most sense, this way we don't need any dispatching
79/// within protocols.
80#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, EnumIter)]
81pub enum Protocol {
82 /// Protocol for chunk fetching, used by availability distribution and availability recovery.
83 ChunkFetchingV1,
84 /// Protocol for fetching collations from collators.
85 CollationFetchingV1,
86 /// Protocol for fetching collations from collators when async backing is enabled.
87 CollationFetchingV2,
88 /// Protocol for fetching seconded PoVs from validators of the same group.
89 PoVFetchingV1,
90 /// Protocol for fetching available data.
91 AvailableDataFetchingV1,
92 /// Sending of dispute statements with application level confirmations.
93 DisputeSendingV1,
94
95 /// Protocol for requesting candidates with attestations in statement distribution
96 /// when async backing is enabled.
97 AttestedCandidateV2,
98
99 /// Protocol for chunk fetching version 2, used by availability distribution and availability
100 /// recovery.
101 ChunkFetchingV2,
102}
103
104/// Minimum bandwidth we expect for validators - 500Mbit/s is the recommendation, so approximately
105/// 50MB per second:
106const MIN_BANDWIDTH_BYTES: u64 = 50 * 1024 * 1024;
107
108/// Default request timeout in seconds.
109///
110/// When decreasing this value, take into account that the very first request might need to open a
111/// connection, which can be slow. If this causes problems, we should ensure connectivity via peer
112/// sets.
113#[allow(dead_code)]
114const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3);
115
116/// Request timeout where we can assume the connection is already open (e.g. we have peers in a
117/// peer set as well).
118const DEFAULT_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_secs(1);
119
120/// Timeout for requesting availability chunks.
121pub const CHUNK_REQUEST_TIMEOUT: Duration = DEFAULT_REQUEST_TIMEOUT_CONNECTED;
122
123/// This timeout is based on the following parameters, assuming we use asynchronous backing with no
124/// time budget within a relay block:
125/// - 500 Mbit/s networking speed
126/// - 10 MB PoV
127/// - 10 parallel executions
128const POV_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_millis(2000);
129
130/// We want attested candidate requests to time out relatively fast,
131/// because slow requests will bottleneck the backing system. Ideally, we'd have
132/// an adaptive timeout based on the candidate size, because there will be a lot of variance
133/// in candidate sizes: candidates with no code and no messages vs candidates with code
134/// and messages.
135///
136/// We supply leniency because there are often large candidates and asynchronous
137/// backing allows them to be included over a longer window of time. Exponential back-off
138/// up to a maximum of 10 seconds would be ideal, but isn't supported by the
139/// infrastructure here yet: see https://github.com/paritytech/polkadot/issues/6009
140const ATTESTED_CANDIDATE_TIMEOUT: Duration = Duration::from_millis(2500);
141
142/// We don't want a slow peer to slow down all the others, at the same time we want to get out the
143/// data quickly in full to at least some peers (as this will reduce load on us as they then can
144/// start serving the data). So this value is a tradeoff. 5 seems to be sensible. So we would need
145/// to have 5 slow nodes connected, to delay transfer for others by `ATTESTED_CANDIDATE_TIMEOUT`.
146pub const MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS: u32 = 5;
147
148/// Response size limit for responses of POV like data.
149///
150/// Same as what we use in substrate networking.
151const POV_RESPONSE_SIZE: u64 = MAX_RESPONSE_SIZE;
152
153/// Maximum response sizes for `AttestedCandidateV2`.
154///
155/// Chosen as a safe upper bound above the governance ceiling on validation code size
156/// (`polkadot_primitives::MAX_CODE_SIZE`), leaving headroom for backing statements and
157/// protocol overhead. This is a transport-level DoS cap only; the effective policy is
158/// enforced by the runtime and the node-side inclusion emulator against the on-chain
159/// `HostConfiguration.max_code_size`.
160const ATTESTED_CANDIDATE_RESPONSE_SIZE: u64 = 8 * 1024 * 1024;
161
162/// We can have relative large timeouts here, there is no value of hitting a
163/// timeout as we want to get statements through to each node in any case.
164pub const DISPUTE_REQUEST_TIMEOUT: Duration = Duration::from_secs(12);
165
166impl Protocol {
167 /// Get a configuration for a given Request response protocol.
168 ///
169 /// Returns a `ProtocolConfig` for this protocol.
170 /// Use this if you plan only to send requests for this protocol.
171 pub fn get_outbound_only_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
172 self,
173 req_protocol_names: &ReqProtocolNames,
174 ) -> N::RequestResponseProtocolConfig {
175 self.create_config::<B, N>(req_protocol_names, None)
176 }
177
178 /// Get a configuration for a given Request response protocol.
179 ///
180 /// Returns a receiver for messages received on this protocol and the requested
181 /// `ProtocolConfig`.
182 pub fn get_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
183 self,
184 req_protocol_names: &ReqProtocolNames,
185 ) -> (async_channel::Receiver<network::IncomingRequest>, N::RequestResponseProtocolConfig) {
186 let (tx, rx) = async_channel::bounded(self.get_channel_size());
187 let cfg = self.create_config::<B, N>(req_protocol_names, Some(tx));
188 (rx, cfg)
189 }
190
191 fn create_config<B: Block, N: NetworkBackend<B, <B as Block>::Hash>>(
192 self,
193 req_protocol_names: &ReqProtocolNames,
194 tx: Option<async_channel::Sender<network::IncomingRequest>>,
195 ) -> N::RequestResponseProtocolConfig {
196 let name = req_protocol_names.get_name(self);
197 let legacy_names = self.get_legacy_name().into_iter().map(Into::into).collect();
198 match self {
199 Protocol::ChunkFetchingV1 | Protocol::ChunkFetchingV2 => N::request_response_config(
200 name,
201 legacy_names,
202 1_000,
203 POV_RESPONSE_SIZE,
204 // We are connected to all validators:
205 CHUNK_REQUEST_TIMEOUT,
206 tx,
207 ),
208 Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 => {
209 N::request_response_config(
210 name,
211 legacy_names,
212 1_000,
213 POV_RESPONSE_SIZE,
214 // Taken from initial implementation in collator protocol:
215 POV_REQUEST_TIMEOUT_CONNECTED,
216 tx,
217 )
218 },
219 Protocol::PoVFetchingV1 => N::request_response_config(
220 name,
221 legacy_names,
222 1_000,
223 POV_RESPONSE_SIZE,
224 POV_REQUEST_TIMEOUT_CONNECTED,
225 tx,
226 ),
227 Protocol::AvailableDataFetchingV1 => N::request_response_config(
228 name,
229 legacy_names,
230 1_000,
231 // Available data size is dominated by the PoV size.
232 POV_RESPONSE_SIZE,
233 POV_REQUEST_TIMEOUT_CONNECTED,
234 tx,
235 ),
236 Protocol::DisputeSendingV1 => N::request_response_config(
237 name,
238 legacy_names,
239 1_000,
240 // Responses are just confirmation, in essence not even a bit. So 100 seems
241 // plenty.
242 100,
243 DISPUTE_REQUEST_TIMEOUT,
244 tx,
245 ),
246 Protocol::AttestedCandidateV2 => N::request_response_config(
247 name,
248 legacy_names,
249 1_000,
250 ATTESTED_CANDIDATE_RESPONSE_SIZE,
251 ATTESTED_CANDIDATE_TIMEOUT,
252 tx,
253 ),
254 }
255 }
256
257 // Channel sizes for the supported protocols.
258 fn get_channel_size(self) -> usize {
259 match self {
260 // Hundreds of validators will start requesting their chunks once they see a candidate
261 // awaiting availability on chain. Given that they will see that block at different
262 // times (due to network delays), 100 seems big enough to accommodate for "bursts",
263 // assuming we can service requests relatively quickly, which would need to be measured
264 // as well.
265 Protocol::ChunkFetchingV1 | Protocol::ChunkFetchingV2 => 100,
266 // 10 seems reasonable, considering group sizes of max 10 validators.
267 Protocol::CollationFetchingV1 | Protocol::CollationFetchingV2 => 10,
268 // 10 seems reasonable, considering group sizes of max 10 validators.
269 Protocol::PoVFetchingV1 => 10,
270 // Validators are constantly self-selecting to request available data which may lead
271 // to constant load and occasional burstiness.
272 Protocol::AvailableDataFetchingV1 => 100,
273 // Incoming requests can get bursty, we should also be able to handle them fast on
274 // average, so something in the ballpark of 100 should be fine. Nodes will retry on
275 // failure, so having a good value here is mostly about performance tuning.
276 Protocol::DisputeSendingV1 => 100,
277
278 Protocol::AttestedCandidateV2 => {
279 // We assume we can utilize up to 70% of the available bandwidth for statements.
280 // This is just a guess/estimate, with the following considerations: If we are
281 // faster than that, queue size will stay low anyway, even if not - requesters will
282 // get an immediate error, but if we are slower, requesters will run in a timeout -
283 // wasting precious time.
284 let available_bandwidth = 7 * MIN_BANDWIDTH_BYTES / 10;
285 let size = u64::saturating_sub(
286 ATTESTED_CANDIDATE_TIMEOUT.as_millis() as u64 * available_bandwidth /
287 (1000 * ATTESTED_CANDIDATE_RESPONSE_SIZE),
288 MAX_PARALLEL_ATTESTED_CANDIDATE_REQUESTS as u64,
289 );
290 debug_assert!(
291 size > 0,
292 "We should have a channel size greater zero, otherwise we won't accept any requests."
293 );
294 size as usize
295 },
296 }
297 }
298
299 /// Legacy protocol name associated with each peer set, if any.
300 /// The request will be tried on this legacy protocol name if the remote refuses to speak the
301 /// protocol.
302 const fn get_legacy_name(self) -> Option<&'static str> {
303 match self {
304 Protocol::ChunkFetchingV1 => Some("/polkadot/req_chunk/1"),
305 Protocol::CollationFetchingV1 => Some("/polkadot/req_collation/1"),
306 Protocol::PoVFetchingV1 => Some("/polkadot/req_pov/1"),
307 Protocol::AvailableDataFetchingV1 => Some("/polkadot/req_available_data/1"),
308 Protocol::DisputeSendingV1 => Some("/polkadot/send_dispute/1"),
309
310 // Introduced after legacy names became legacy.
311 Protocol::AttestedCandidateV2 => None,
312 Protocol::CollationFetchingV2 => None,
313 Protocol::ChunkFetchingV2 => None,
314 }
315 }
316}
317
318/// Common properties of any `Request`.
319pub trait IsRequest {
320 /// Each request has a corresponding `Response`.
321 type Response;
322
323 /// What protocol this `Request` implements.
324 const PROTOCOL: Protocol;
325}
326
327/// Type for getting on the wire [`Protocol`] names using genesis hash & fork id.
328#[derive(Clone)]
329pub struct ReqProtocolNames {
330 names: HashMap<Protocol, ProtocolName>,
331}
332
333impl ReqProtocolNames {
334 /// Construct [`ReqProtocolNames`] from `genesis_hash` and `fork_id`.
335 pub fn new<Hash: AsRef<[u8]>>(genesis_hash: Hash, fork_id: Option<&str>) -> Self {
336 let mut names = HashMap::new();
337 for protocol in Protocol::iter() {
338 names.insert(protocol, Self::generate_name(protocol, &genesis_hash, fork_id));
339 }
340 Self { names }
341 }
342
343 /// Get on the wire [`Protocol`] name.
344 pub fn get_name(&self, protocol: Protocol) -> ProtocolName {
345 self.names
346 .get(&protocol)
347 .expect("All `Protocol` enum variants are added above via `strum`; qed")
348 .clone()
349 }
350
351 /// Protocol name of this protocol based on `genesis_hash` and `fork_id`.
352 fn generate_name<Hash: AsRef<[u8]>>(
353 protocol: Protocol,
354 genesis_hash: &Hash,
355 fork_id: Option<&str>,
356 ) -> ProtocolName {
357 let prefix = if let Some(fork_id) = fork_id {
358 format!("/{}/{}", hex::encode(genesis_hash), fork_id)
359 } else {
360 format!("/{}", hex::encode(genesis_hash))
361 };
362
363 let short_name = match protocol {
364 // V1:
365 Protocol::ChunkFetchingV1 => "/req_chunk/1",
366 Protocol::CollationFetchingV1 => "/req_collation/1",
367 Protocol::PoVFetchingV1 => "/req_pov/1",
368 Protocol::AvailableDataFetchingV1 => "/req_available_data/1",
369 Protocol::DisputeSendingV1 => "/send_dispute/1",
370
371 // V2:
372 Protocol::CollationFetchingV2 => "/req_collation/2",
373 Protocol::AttestedCandidateV2 => "/req_attested_candidate/2",
374 Protocol::ChunkFetchingV2 => "/req_chunk/2",
375 };
376
377 format!("{}{}", prefix, short_name).into()
378 }
379}