referrerpolicy=no-referrer-when-downgrade

sc_network_sync/strategy/
disconnected_peers.rs

1// This file is part of Substrate.
2
3// Copyright (C) Parity Technologies (UK) Ltd.
4// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this program. If not, see <https://www.gnu.org/licenses/>.
18
19use crate::types::BadPeer;
20use sc_network::ReputationChange as Rep;
21use sc_network_types::PeerId;
22use schnellru::{ByLength, LruMap};
23
24const LOG_TARGET: &str = "sync::disconnected_peers";
25
26/// The maximum number of disconnected peers to keep track of.
27///
28/// When a peer disconnects, we must keep track if it was in the middle of a request.
29/// The peer may disconnect because it cannot keep up with the number of requests
30/// (ie not having enough resources available to handle the requests); or because it is malicious.
31const MAX_DISCONNECTED_PEERS_STATE: u32 = 512;
32
33/// The time we are going to backoff a peer that has disconnected with an inflight request.
34///
35/// The backoff time is calculated as `num_disconnects * DISCONNECTED_PEER_BACKOFF_SECONDS`.
36/// This is to prevent submitting a request to a peer that has disconnected because it could not
37/// keep up with the number of requests.
38///
39/// The peer may disconnect due to the keep-alive timeout, however disconnections without
40/// an inflight request are not tracked.
41const DISCONNECTED_PEER_BACKOFF_SECONDS: u64 = 60;
42
43/// Maximum number of disconnects with a request in flight before a peer is banned.
44const MAX_NUM_DISCONNECTS: u64 = 3;
45
46/// Peer disconnected with a request in flight after backoffs.
47///
48/// The peer may be slow to respond to the request after backoffs, or it refuses to respond.
49/// Report the peer and let the reputation system handle disconnecting the peer.
50pub const REPUTATION_REPORT: Rep = Rep::new_fatal("Peer disconnected with inflight after backoffs");
51
52/// The state of a disconnected peer with a request in flight.
53#[derive(Debug)]
54struct DisconnectedState {
55	/// The total number of disconnects.
56	num_disconnects: u64,
57	/// The time at the last disconnect.
58	last_disconnect: std::time::Instant,
59}
60
61impl DisconnectedState {
62	/// Create a new `DisconnectedState`.
63	pub fn new() -> Self {
64		Self { num_disconnects: 1, last_disconnect: std::time::Instant::now() }
65	}
66
67	/// Increment the number of disconnects.
68	pub fn increment(&mut self) {
69		self.num_disconnects = self.num_disconnects.saturating_add(1);
70		self.last_disconnect = std::time::Instant::now();
71	}
72
73	/// Get the number of disconnects.
74	pub fn num_disconnects(&self) -> u64 {
75		self.num_disconnects
76	}
77
78	/// Get the time of the last disconnect.
79	pub fn last_disconnect(&self) -> std::time::Instant {
80		self.last_disconnect
81	}
82}
83
84/// Tracks the state of disconnected peers with a request in flight.
85///
86/// This helps to prevent submitting requests to peers that have disconnected
87/// before responding to the request to offload the peer.
88pub struct DisconnectedPeers {
89	/// The state of disconnected peers.
90	disconnected_peers: LruMap<PeerId, DisconnectedState>,
91	/// Backoff duration in seconds.
92	backoff_seconds: u64,
93}
94
95impl DisconnectedPeers {
96	/// Create a new `DisconnectedPeers`.
97	pub fn new() -> Self {
98		Self {
99			disconnected_peers: LruMap::new(ByLength::new(MAX_DISCONNECTED_PEERS_STATE)),
100			backoff_seconds: DISCONNECTED_PEER_BACKOFF_SECONDS,
101		}
102	}
103
104	/// Insert a new peer to the persistent state if not seen before, or update the state if seen.
105	///
106	/// Returns true if the peer should be disconnected.
107	pub fn on_disconnect_during_request(&mut self, peer: PeerId) -> Option<BadPeer> {
108		if let Some(state) = self.disconnected_peers.get(&peer) {
109			state.increment();
110
111			let should_ban = state.num_disconnects() >= MAX_NUM_DISCONNECTS;
112			log::debug!(
113				target: LOG_TARGET,
114				"Disconnected known peer {peer} state: {state:?}, should ban: {should_ban}",
115			);
116
117			should_ban.then(|| {
118				// We can lose track of the peer state and let the banning mechanism handle
119				// the peer backoff.
120				//
121				// After the peer banning expires, if the peer continues to misbehave, it will be
122				// backed off again.
123				self.disconnected_peers.remove(&peer);
124				BadPeer(peer, REPUTATION_REPORT)
125			})
126		} else {
127			log::debug!(
128				target: LOG_TARGET,
129				"Added peer {peer} for the first time"
130			);
131			// First time we see this peer.
132			self.disconnected_peers.insert(peer, DisconnectedState::new());
133			None
134		}
135	}
136
137	/// Check if a peer is available for queries.
138	pub fn is_peer_available(&mut self, peer_id: &PeerId) -> bool {
139		let Some(state) = self.disconnected_peers.get(peer_id) else {
140			return true;
141		};
142
143		let elapsed = state.last_disconnect().elapsed();
144		if elapsed.as_secs() >= self.backoff_seconds * state.num_disconnects {
145			log::debug!(target: LOG_TARGET, "Peer {peer_id} is available for queries");
146			self.disconnected_peers.remove(peer_id);
147			true
148		} else {
149			log::debug!(target: LOG_TARGET,"Peer {peer_id} is backedoff");
150			false
151		}
152	}
153}
154
155#[cfg(test)]
156mod tests {
157	use super::*;
158	use std::time::Duration;
159
160	#[test]
161	fn test_disconnected_peer_state() {
162		let mut state = DisconnectedPeers::new();
163		let peer = PeerId::random();
164
165		// Is not part of the disconnected peers yet.
166		assert_eq!(state.is_peer_available(&peer), true);
167
168		for _ in 0..MAX_NUM_DISCONNECTS - 1 {
169			assert!(state.on_disconnect_during_request(peer).is_none());
170			assert_eq!(state.is_peer_available(&peer), false);
171		}
172
173		assert!(state.on_disconnect_during_request(peer).is_some());
174		// Peer is supposed to get banned and disconnected.
175		// The state ownership moves to the PeerStore.
176		assert!(state.disconnected_peers.get(&peer).is_none());
177	}
178
179	#[test]
180	fn ensure_backoff_time() {
181		const TEST_BACKOFF_SECONDS: u64 = 2;
182		let mut state = DisconnectedPeers {
183			disconnected_peers: LruMap::new(ByLength::new(1)),
184			backoff_seconds: TEST_BACKOFF_SECONDS,
185		};
186		let peer = PeerId::random();
187
188		assert!(state.on_disconnect_during_request(peer).is_none());
189		assert_eq!(state.is_peer_available(&peer), false);
190
191		// Wait until the backoff time has passed
192		std::thread::sleep(Duration::from_secs(TEST_BACKOFF_SECONDS + 1));
193
194		assert_eq!(state.is_peer_available(&peer), true);
195	}
196}