referrerpolicy=no-referrer-when-downgrade

node_bench/
trie.rs

1// This file is part of Substrate.
2
3// Copyright (C) Parity Technologies (UK) Ltd.
4// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this program. If not, see <https://www.gnu.org/licenses/>.
18
19//! Trie benchmark (integrated).
20
21use hash_db::Prefix;
22use kvdb::KeyValueDB;
23use rand::Rng;
24use sp_state_machine::Backend as _;
25use sp_trie::{trie_types::TrieDBMutBuilderV1, TrieMut as _};
26use std::{
27	borrow::Cow,
28	collections::HashMap,
29	sync::{Arc, LazyLock},
30};
31
32use node_primitives::Hash;
33
34use crate::{
35	core::{self, Mode, Path},
36	generator::generate_trie,
37	simple_trie::SimpleTrie,
38	tempdb::{DatabaseType, TempDatabase},
39};
40
41pub const SAMPLE_SIZE: usize = 100;
42pub const TEST_WRITE_SIZE: usize = 128;
43
44pub type KeyValue = (Vec<u8>, Vec<u8>);
45pub type KeyValues = Vec<KeyValue>;
46
47#[derive(Clone, Copy, Debug, derive_more::Display)]
48pub enum DatabaseSize {
49	#[display(fmt = "empty")]
50	Empty,
51	#[display(fmt = "smallest")]
52	Smallest,
53	#[display(fmt = "small")]
54	Small,
55	#[display(fmt = "medium")]
56	Medium,
57	#[display(fmt = "large")]
58	Large,
59	#[display(fmt = "huge")]
60	Huge,
61}
62
63static KUSAMA_STATE_DISTRIBUTION: LazyLock<SizePool> =
64	LazyLock::new(|| SizePool::from_histogram(crate::state_sizes::KUSAMA_STATE_DISTRIBUTION));
65
66impl DatabaseSize {
67	/// Should be multiple of SAMPLE_SIZE!
68	fn keys(&self) -> usize {
69		let val = match *self {
70			Self::Empty => 200, // still need some keys to query
71			Self::Smallest => 1_000,
72			Self::Small => 10_000,
73			Self::Medium => 100_000,
74			Self::Large => 200_000,
75			Self::Huge => 1_000_000,
76		};
77
78		assert_eq!(val % SAMPLE_SIZE, 0);
79
80		val
81	}
82}
83
84fn pretty_print(v: usize) -> String {
85	let mut print = String::new();
86	for (idx, val) in v.to_string().chars().rev().enumerate() {
87		if idx != 0 && idx % 3 == 0 {
88			print.insert(0, ',');
89		}
90		print.insert(0, val);
91	}
92	print
93}
94
95pub struct TrieReadBenchmarkDescription {
96	pub database_size: DatabaseSize,
97	pub database_type: DatabaseType,
98}
99
100pub struct TrieReadBenchmark {
101	database: TempDatabase,
102	root: Hash,
103	warmup_keys: KeyValues,
104	query_keys: KeyValues,
105	database_type: DatabaseType,
106}
107
108impl core::BenchmarkDescription for TrieReadBenchmarkDescription {
109	fn path(&self) -> Path {
110		let mut path = Path::new(&["trie", "read"]);
111		path.push(&format!("{}", self.database_size));
112		path
113	}
114
115	fn setup(self: Box<Self>) -> Box<dyn core::Benchmark> {
116		let mut database = TempDatabase::new();
117
118		let mut rng = rand::thread_rng();
119		let warmup_prefix = KUSAMA_STATE_DISTRIBUTION.key(&mut rng);
120
121		let mut key_values = KeyValues::new();
122		let mut warmup_keys = KeyValues::new();
123		let mut query_keys = KeyValues::new();
124		let every_x_key = self.database_size.keys() / SAMPLE_SIZE;
125		for idx in 0..self.database_size.keys() {
126			let kv = (
127				KUSAMA_STATE_DISTRIBUTION.key(&mut rng).to_vec(),
128				KUSAMA_STATE_DISTRIBUTION.value(&mut rng),
129			);
130			if idx % every_x_key == 0 {
131				// warmup keys go to separate tree with high prob
132				let mut actual_warmup_key = warmup_prefix.clone();
133				actual_warmup_key[16..].copy_from_slice(&kv.0[16..]);
134				warmup_keys.push((actual_warmup_key.clone(), kv.1.clone()));
135				key_values.push((actual_warmup_key.clone(), kv.1.clone()));
136			} else if idx % every_x_key == 1 {
137				query_keys.push(kv.clone());
138			}
139
140			key_values.push(kv)
141		}
142
143		assert_eq!(warmup_keys.len(), SAMPLE_SIZE);
144		assert_eq!(query_keys.len(), SAMPLE_SIZE);
145
146		let root = generate_trie(database.open(self.database_type), key_values);
147
148		Box::new(TrieReadBenchmark {
149			database,
150			root,
151			warmup_keys,
152			query_keys,
153			database_type: self.database_type,
154		})
155	}
156
157	fn name(&self) -> Cow<'static, str> {
158		format!(
159			"Trie read benchmark({:?} database ({} keys), db_type: {:?})",
160			self.database_size,
161			pretty_print(self.database_size.keys()),
162			self.database_type,
163		)
164		.into()
165	}
166}
167
168struct Storage(Arc<dyn KeyValueDB>);
169
170impl sp_state_machine::Storage<sp_core::Blake2Hasher> for Storage {
171	fn get(&self, key: &Hash, prefix: Prefix) -> Result<Option<Vec<u8>>, String> {
172		let key = sp_trie::prefixed_key::<sp_core::Blake2Hasher>(key, prefix);
173		self.0.get(0, &key).map_err(|e| format!("Database backend error: {:?}", e))
174	}
175}
176
177impl core::Benchmark for TrieReadBenchmark {
178	fn run(&mut self, mode: Mode) -> std::time::Duration {
179		let mut db = self.database.clone();
180
181		let storage: Arc<dyn sp_state_machine::Storage<sp_core::Blake2Hasher>> =
182			Arc::new(Storage(db.open(self.database_type)));
183
184		let trie_backend = sp_state_machine::TrieBackendBuilder::new(storage, self.root).build();
185		for (warmup_key, warmup_value) in self.warmup_keys.iter() {
186			let value = trie_backend
187				.storage(&warmup_key[..])
188				.expect("Failed to get key: db error")
189				.expect("Warmup key should exist");
190
191			// sanity for warmup keys
192			assert_eq!(&value, warmup_value);
193		}
194
195		if mode == Mode::Profile {
196			std::thread::park_timeout(std::time::Duration::from_secs(3));
197		}
198
199		let started = std::time::Instant::now();
200		for (key, _) in self.query_keys.iter() {
201			let _ = trie_backend.storage(&key[..]);
202		}
203		let elapsed = started.elapsed();
204
205		if mode == Mode::Profile {
206			std::thread::park_timeout(std::time::Duration::from_secs(1));
207		}
208
209		elapsed / (SAMPLE_SIZE as u32)
210	}
211}
212
213pub struct TrieWriteBenchmarkDescription {
214	pub database_size: DatabaseSize,
215	pub database_type: DatabaseType,
216}
217
218impl core::BenchmarkDescription for TrieWriteBenchmarkDescription {
219	fn path(&self) -> Path {
220		let mut path = Path::new(&["trie", "write"]);
221		path.push(&format!("{}", self.database_size));
222		path
223	}
224
225	fn setup(self: Box<Self>) -> Box<dyn core::Benchmark> {
226		let mut database = TempDatabase::new();
227
228		let mut rng = rand::thread_rng();
229		let warmup_prefix = KUSAMA_STATE_DISTRIBUTION.key(&mut rng);
230
231		let mut key_values = KeyValues::new();
232		let mut warmup_keys = KeyValues::new();
233		let every_x_key = self.database_size.keys() / SAMPLE_SIZE;
234		for idx in 0..self.database_size.keys() {
235			let kv = (
236				KUSAMA_STATE_DISTRIBUTION.key(&mut rng).to_vec(),
237				KUSAMA_STATE_DISTRIBUTION.value(&mut rng),
238			);
239			if idx % every_x_key == 0 {
240				// warmup keys go to separate tree with high prob
241				let mut actual_warmup_key = warmup_prefix.clone();
242				actual_warmup_key[16..].copy_from_slice(&kv.0[16..]);
243				warmup_keys.push((actual_warmup_key.clone(), kv.1.clone()));
244				key_values.push((actual_warmup_key.clone(), kv.1.clone()));
245			}
246
247			key_values.push(kv)
248		}
249
250		assert_eq!(warmup_keys.len(), SAMPLE_SIZE);
251
252		let root = generate_trie(database.open(self.database_type), key_values);
253
254		Box::new(TrieWriteBenchmark {
255			database,
256			root,
257			warmup_keys,
258			database_type: self.database_type,
259		})
260	}
261
262	fn name(&self) -> Cow<'static, str> {
263		format!(
264			"Trie write benchmark({:?} database ({} keys), db_type = {:?})",
265			self.database_size,
266			pretty_print(self.database_size.keys()),
267			self.database_type,
268		)
269		.into()
270	}
271}
272
273struct TrieWriteBenchmark {
274	database: TempDatabase,
275	root: Hash,
276	warmup_keys: KeyValues,
277	database_type: DatabaseType,
278}
279
280impl core::Benchmark for TrieWriteBenchmark {
281	fn run(&mut self, mode: Mode) -> std::time::Duration {
282		let mut rng = rand::thread_rng();
283		let mut db = self.database.clone();
284		let kvdb = db.open(self.database_type);
285
286		let mut new_root = self.root;
287
288		let mut overlay = HashMap::new();
289		let mut trie = SimpleTrie { db: kvdb.clone(), overlay: &mut overlay };
290		let mut trie_db_mut = TrieDBMutBuilderV1::from_existing(&mut trie, &mut new_root).build();
291
292		for (warmup_key, warmup_value) in self.warmup_keys.iter() {
293			let value = trie_db_mut
294				.get(&warmup_key[..])
295				.expect("Failed to get key: db error")
296				.expect("Warmup key should exist");
297
298			// sanity for warmup keys
299			assert_eq!(&value, warmup_value);
300		}
301
302		let test_key = random_vec(&mut rng, 32);
303		let test_val = random_vec(&mut rng, TEST_WRITE_SIZE);
304
305		if mode == Mode::Profile {
306			std::thread::park_timeout(std::time::Duration::from_secs(3));
307		}
308
309		let started = std::time::Instant::now();
310
311		trie_db_mut.insert(&test_key, &test_val).expect("Should be inserted ok");
312		trie_db_mut.commit();
313		drop(trie_db_mut);
314
315		let mut transaction = kvdb.transaction();
316		for (key, value) in overlay.into_iter() {
317			match value {
318				Some(value) => transaction.put(0, &key[..], &value[..]),
319				None => transaction.delete(0, &key[..]),
320			}
321		}
322		kvdb.write(transaction).expect("Failed to write transaction");
323
324		let elapsed = started.elapsed();
325
326		// sanity check
327		assert!(new_root != self.root);
328
329		if mode == Mode::Profile {
330			std::thread::park_timeout(std::time::Duration::from_secs(1));
331		}
332
333		elapsed
334	}
335}
336
337fn random_vec<R: Rng>(rng: &mut R, len: usize) -> Vec<u8> {
338	let mut val = vec![0u8; len];
339	rng.fill_bytes(&mut val[..]);
340	val
341}
342
343struct SizePool {
344	distribution: std::collections::BTreeMap<u32, u32>,
345	total: u32,
346}
347
348impl SizePool {
349	fn from_histogram(h: &[(u32, u32)]) -> SizePool {
350		let mut distribution = std::collections::BTreeMap::default();
351		let mut total = 0;
352		for (size, count) in h {
353			total += count;
354			distribution.insert(total, *size);
355		}
356		SizePool { distribution, total }
357	}
358
359	fn value<R: Rng>(&self, rng: &mut R) -> Vec<u8> {
360		let sr = (rng.next_u64() % self.total as u64) as u32;
361		let mut range = self
362			.distribution
363			.range((std::ops::Bound::Included(sr), std::ops::Bound::Unbounded));
364		let size = *range.next().unwrap().1 as usize;
365		random_vec(rng, size)
366	}
367
368	fn key<R: Rng>(&self, rng: &mut R) -> Vec<u8> {
369		random_vec(rng, 32)
370	}
371}