try_runtime_core/commands/on_runtime_upgrade/
mod.rs

1// This file is part of try-runtime-cli.
2
3// Copyright (C) Parity Technologies (UK) Ltd.
4// SPDX-License-Identifier: Apache-2.0
5
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9//
10// 	http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18pub mod mbms;
19
20use std::{collections::BTreeMap, fmt::Debug, str::FromStr};
21
22use bytesize::ByteSize;
23use frame_remote_externalities::RemoteExternalities;
24use frame_try_runtime::UpgradeCheckSelect;
25use log::Level;
26use parity_scale_codec::Encode;
27use sc_executor::sp_wasm_interface::HostFunctions;
28use sp_core::{hexdisplay::HexDisplay, twox_128, Hasher, H256};
29use sp_runtime::{
30    traits::{Block as BlockT, HashingFor, NumberFor},
31    DeserializeOwned,
32};
33use sp_state_machine::{CompactProof, OverlayedChanges, StorageProof};
34
35use crate::{
36    commands::on_runtime_upgrade::mbms::MbmChecker,
37    common::{
38        misc_logging::{basti_log, LogLevelGuard},
39        state::{build_executor, state_machine_call_with_proof, RuntimeChecks, State},
40    },
41    RefTimeInfo, SharedParams, LOG_TARGET,
42};
43
44/// Configuration for the `on_runtime_upgrade` command.
45///
46/// The parameters below control the behavior of runtime upgrade checks e.g. whether to disable
47/// weight warnings, whether to run multi-block migration checks.
48#[derive(Debug, Clone, clap::Parser)]
49pub struct Command {
50    /// The source of the blockchain state to use when running the `on-runtime-upgrade` command.
51    #[command(subcommand)]
52    pub state: State,
53
54    /// Select which optional checks to perform. Selects all when no value is given.
55    ///
56    /// - `none`: Perform no checks.
57    /// - `all`: Perform all checks (default when --checks is present with no value).
58    /// - `pre-and-post`: Perform pre- and post-upgrade checks (default when the arg is not
59    ///   present).
60    /// - `try-state`: Perform the try-state checks.
61    ///
62    /// Performing any checks will potentially invalidate the measured PoV/Weight.
63    // NOTE: The clap attributes make it backwards compatible with the previous `--checks` flag.
64    #[clap(long,
65		default_value = "pre-and-post",
66		default_missing_value = "all",
67		num_args = 0..=1,
68		verbatim_doc_comment
69    )]
70    pub checks: UpgradeCheckSelect,
71
72    /// Whether to disable weight warnings. Useful if a relay chain's runtime is being tested.
73    #[clap(long, default_value = "false", default_missing_value = "true")]
74    pub no_weight_warnings: bool,
75
76    /// Whether to skip enforcing that the new runtime `spec_version` is greater or equal to the
77    /// existing `spec_version`.
78    #[clap(long, default_value = "false", default_missing_value = "true")]
79    pub disable_spec_version_check: bool,
80
81    /// Whether to disable migration idempotency checks.
82    #[clap(long, default_value = "false", default_missing_value = "true")]
83    pub disable_idempotency_checks: bool,
84
85    /// When migrations are detected as not idempotent, enabling this will output a diff of the
86    /// storage before and after running the same set of migrations the second time.
87    #[clap(long, default_value = "false", default_missing_value = "true")]
88    pub print_storage_diff: bool,
89
90    /// Whether or not multi-block migrations should be executed to completion after single-block
91    /// migratons are completed.
92    #[clap(long, default_value = "false", default_missing_value = "true")]
93    pub disable_mbm_checks: bool,
94
95    /// The maximum duration that all MBMs combined are expected to take.
96    ///
97    /// This value ensures the CLI won't run indefinitely in case of a buggy MBM.
98    #[clap(long, default_value = "600")]
99    pub mbm_max_blocks: u32,
100
101    /// The chain blocktime, in milliseconds.
102    #[arg(long)]
103    pub blocktime: u64,
104}
105
106/// Convenience struct to hold all the generic args and where clauses.
107pub(crate) struct CheckOnRuntimeUpgrade<Block, HostFns> {
108    pub shared: SharedParams,
109    pub command: Command,
110    pub _phantom: std::marker::PhantomData<(Block, HostFns)>,
111}
112
113impl<Block: BlockT<Hash = H256> + DeserializeOwned, HostFns> CheckOnRuntimeUpgrade<Block, HostFns>
114where
115    Block: BlockT + serde::de::DeserializeOwned,
116    <Block::Hash as FromStr>::Err: Debug,
117    Block::Header: serde::de::DeserializeOwned,
118    NumberFor<Block>: FromStr,
119    <NumberFor<Block> as FromStr>::Err: Debug,
120    HostFns: HostFunctions,
121{
122    // Runs the `on-runtime-upgrade` command.
123    pub async fn run(&self) -> sc_cli::Result<()> {
124        let shared = &self.shared;
125        let command = &self.command;
126
127        let executor = build_executor(shared);
128        let runtime_checks = RuntimeChecks {
129            name_matches: !shared.disable_spec_name_check,
130            version_increases: !command.disable_spec_version_check,
131            try_runtime_feature_enabled: true,
132        };
133        let mut ext = command
134            .state
135            .to_ext::<Block, HostFns>(shared, &executor, None, runtime_checks)
136            .await?;
137
138        let sync_checks = if command.disable_mbm_checks {
139            command.checks
140        } else {
141            UpgradeCheckSelect::None
142        };
143
144        // Run `TryRuntime_on_runtime_upgrade` with the given checks.
145        basti_log(
146            Level::Info,
147            format!(
148                "🔬 Running TryRuntime_on_runtime_upgrade with checks: {:?}",
149                sync_checks
150            )
151            .as_str(),
152        );
153
154        // Check the Single-Block-Migrations work:
155        let mut overlayed_changes = Default::default();
156        let _ = state_machine_call_with_proof::<Block, HostFns>(
157            &ext,
158            &mut overlayed_changes,
159            &executor,
160            "TryRuntime_on_runtime_upgrade",
161            sync_checks.encode().as_ref(),
162            Default::default(), // we don't really need any extensions here.
163            shared.export_proof.clone(),
164        )?;
165
166        let idempotency_ok = self.check_idempotency(&mut ext, &overlayed_changes)?;
167        let weight_ok = self.check_weight(&ext)?;
168
169        self.check_mbms(runtime_checks).await?;
170
171        if !weight_ok || !idempotency_ok {
172            return Err("Runtime Upgrade issues detected, exiting non-zero. See logs.".into());
173        }
174
175        Ok(())
176    }
177
178    /// Check that the migrations are idempotent.
179    ///
180    /// Expects the overlayed changes from the first execution of the migrations.
181    fn check_idempotency(
182        &self,
183        ext: &mut RemoteExternalities<Block>,
184        changes: &OverlayedChanges<HashingFor<Block>>,
185    ) -> sc_cli::Result<bool> {
186        if !self.command.disable_idempotency_checks {
187            basti_log(
188                Level::Info,
189                format!(
190                    "🔬 Running TryRuntime_on_runtime_upgrade again to check idempotency: {:?}",
191                    self.command.checks
192                )
193                .as_str(),
194            );
195            let executor = build_executor(&self.shared);
196
197            let before = changes.clone();
198            let mut after = changes.clone();
199
200            // The MBM pallet refuses to interrupt ongoing MBMs, so we need to pretend that it did
201            // not run yet. We cannot just use a prefious state since the single-block-migrations
202            // would not be tested for idempotency.
203            // TODO add switch and guessing logic for the MBM pallet name.
204            let key = [twox_128(b"MultiBlockMigrations"), twox_128(b"Cursor")].concat();
205            after.clear_prefix(&key);
206
207            // Don't print all logs again.
208            // let _quiet = LogLevelGuard::only_errors();
209            match state_machine_call_with_proof::<Block, HostFns>(
210                ext,
211                &mut after,
212                &executor,
213                "TryRuntime_on_runtime_upgrade",
214                UpgradeCheckSelect::None.encode().as_ref(),
215                Default::default(),
216                self.shared.export_proof.clone(),
217            ) {
218                Ok(_) => {
219                    if self.changed(ext, before, after)? {
220                        log::error!("❌ Migrations must behave the same when executed twice. This was not the case as a storage root hash mismatch was detected. Remove migrations one-by-one and re-run until you find the culprit.");
221                        Ok(false)
222                    } else {
223                        log::info!("✅ Migrations are idempotent");
224                        Ok(true)
225                    }
226                }
227                Err(e) => {
228                    log::error!(
229                            "❌ Migrations are not idempotent, they failed during the second execution.",
230                        );
231                    log::debug!("{:?}", e);
232                    Ok(false)
233                }
234            }
235        } else {
236            log::info!("ℹ Skipping idempotency check");
237            Ok(true)
238        }
239    }
240
241    async fn check_mbms(&self, runtime_checks: RuntimeChecks) -> sc_cli::Result<()> {
242        if self.command.disable_mbm_checks {
243            log::info!("ℹ Skipping Multi-Block-Migrations");
244            return Ok(());
245        }
246
247        let checker = MbmChecker::<Block, HostFns> {
248            command: self.command.clone(),
249            shared: self.shared.clone(),
250            runtime_checks,
251            _phantom: Default::default(),
252        };
253
254        checker.check_mbms().await
255    }
256
257    /// Check that the migrations don't use more weights than a block.
258    fn check_weight(&self, ext: &RemoteExternalities<Block>) -> sc_cli::Result<bool> {
259        if self.command.no_weight_warnings {
260            log::info!("ℹ Skipping weight safety check");
261            return Ok(true);
262        }
263        basti_log(
264            Level::Info,
265            "🔬 TryRuntime_on_runtime_upgrade succeeded! Running it again for weight measurements.",
266        );
267
268        let executor = build_executor(&self.shared);
269        let _quiet = LogLevelGuard::only_errors();
270        let (proof, encoded_result) = state_machine_call_with_proof::<Block, HostFns>(
271            ext,
272            &mut Default::default(),
273            &executor,
274            "TryRuntime_on_runtime_upgrade",
275            UpgradeCheckSelect::None.encode().as_ref(),
276            Default::default(),
277            self.shared.export_proof.clone(),
278        )?;
279        let ref_time_results = encoded_result.try_into()?;
280        drop(_quiet);
281
282        let pre_root = ext.backend.root();
283        let pov_safety = analyse_pov::<HashingFor<Block>>(proof, *pre_root);
284        let ref_time_safety = analyse_ref_time(ref_time_results);
285
286        match (pov_safety, ref_time_safety) {
287            (WeightSafety::ProbablySafe, WeightSafety::ProbablySafe) => {
288                log::info!(
289                    target: LOG_TARGET,
290                    "✅ No weight safety issues detected. \
291                    Please note this does not guarantee a successful runtime upgrade. \
292                    Always test your runtime upgrade with recent state, and ensure that the weight usage \
293                    of your migrations will not drastically differ between testing and actual on-chain \
294                    execution."
295                );
296                Ok(true)
297            }
298            _ => {
299                log::error!(target: LOG_TARGET, "❌ Weight safety issues detected.");
300                Ok(false)
301            }
302        }
303    }
304
305    /// Whether any storage was changed.
306    fn changed(
307        &self,
308        ext: &RemoteExternalities<Block>,
309        mut before: OverlayedChanges<HashingFor<Block>>,
310        mut after: OverlayedChanges<HashingFor<Block>>,
311    ) -> sc_cli::Result<bool> {
312        // Events are fine to not be idempotent.
313        let key = [twox_128(b"System"), twox_128(b"Events")].concat();
314        after.clear_prefix(&key);
315        before.clear_prefix(&key);
316        let key = [twox_128(b"System"), twox_128(b"EventCount")].concat();
317        after.clear_prefix(&key);
318        before.clear_prefix(&key);
319
320        let (root_before, _) = before.storage_root(&ext.backend, ext.state_version);
321        let (root_after, _) = after.storage_root(&ext.backend, ext.state_version);
322
323        log::info!(
324            "Storage root before: 0x{}, after: 0x{}",
325            hex::encode(root_before),
326            hex::encode(root_after),
327        );
328
329        if root_before == root_after {
330            return Ok(false);
331        }
332
333        if self.command.print_storage_diff {
334            log::info!("Changed storage keys:");
335            let changes_before = collect_storage_changes_as_hex::<Block>(&before);
336            let changes_after = collect_storage_changes_as_hex::<Block>(&after);
337
338            similar_asserts::assert_eq!(changes_before, changes_after);
339            Err("Storage changes detected: migrations not idempotent".into())
340        } else {
341            log::error!("Run with --print-storage-diff to see list of changed storage keys.");
342            Ok(true)
343        }
344    }
345}
346
347enum WeightSafety {
348    ProbablySafe,
349    PotentiallyUnsafe,
350}
351
352/// The default maximum PoV size in MB.
353const DEFAULT_MAX_POV_SIZE: ByteSize = ByteSize::mb(5);
354
355/// The fraction of the total available ref_time or pov size after which a warning should be logged.
356const DEFAULT_WARNING_THRESHOLD: f32 = 0.8;
357
358/// Analyse the given ref_times and return if there is a potential weight safety issue.
359fn analyse_pov<H>(proof: StorageProof, pre_root: H::Out) -> WeightSafety
360where
361    H: Hasher,
362{
363    if proof.is_empty() {
364        log::info!(target: LOG_TARGET, "Empty PoV detected");
365        return WeightSafety::ProbablySafe;
366    }
367
368    let encoded_proof_size = proof.encoded_size();
369    let compact_proof = proof
370        .clone()
371        .into_compact_proof::<H>(pre_root)
372        .map_err(|e| {
373            log::error!(target: LOG_TARGET, "failed to generate compact proof: {:?}", e);
374            e
375        })
376        .unwrap_or(CompactProof {
377            encoded_nodes: Default::default(),
378        });
379
380    let compact_proof_size = compact_proof.encoded_size();
381    let compressed_compact_proof = zstd::stream::encode_all(&compact_proof.encode()[..], 0)
382        .map_err(|e| {
383            log::error!(
384                target: LOG_TARGET,
385                "failed to generate compressed proof: {:?}",
386                e
387            );
388            e
389        })
390        .expect("generating compressed proof should never fail if proof is valid");
391
392    let proof_nodes = proof.into_nodes();
393    log::debug!(
394        target: LOG_TARGET,
395        "Proof: 0x{}... / {} nodes",
396        HexDisplay::from(&proof_nodes.iter().flatten().cloned().take(10).collect::<Vec<_>>()),
397        proof_nodes.len()
398    );
399    log::debug!(target: LOG_TARGET, "Encoded proof size: {}", ByteSize(encoded_proof_size as u64));
400    log::debug!(target: LOG_TARGET, "Compact proof size: {}", ByteSize(compact_proof_size as u64),);
401    log::info!(
402        target: LOG_TARGET,
403        "PoV size (zstd-compressed compact proof): {}. For parachains, it's your responsibility \
404        to verify that a PoV of this size fits within any relaychain constraints.",
405        ByteSize(compressed_compact_proof.len() as u64),
406    );
407    if compressed_compact_proof.len() as f32
408        > DEFAULT_MAX_POV_SIZE.as_u64() as f32 * DEFAULT_WARNING_THRESHOLD
409    {
410        log::warn!(
411            target: LOG_TARGET,
412            "A PoV size of {} is significant. Most relay chains usually accept PoVs up to {}. \
413            Proceed with caution.",
414            ByteSize(compressed_compact_proof.len() as u64),
415            DEFAULT_MAX_POV_SIZE,
416        );
417        WeightSafety::PotentiallyUnsafe
418    } else {
419        WeightSafety::ProbablySafe
420    }
421}
422
423/// Analyse the given ref_times and return if there is a potential weight safety issue.
424fn analyse_ref_time(ref_time_results: RefTimeInfo) -> WeightSafety {
425    let RefTimeInfo { used, max } = ref_time_results;
426    let (used, max) = (used.as_secs_f32(), max.as_secs_f32());
427    log::info!(
428        target: LOG_TARGET,
429        "Consumed ref_time: {}s ({:.2}% of max {}s)",
430        used,
431        used / max * 100.0,
432        max,
433    );
434    if used >= max * DEFAULT_WARNING_THRESHOLD {
435        log::warn!(
436            target: LOG_TARGET,
437            "Consumed ref_time is >= {}% of the max allowed ref_time. Please ensure the \
438            migration is not be too computationally expensive to be fit in a single block.",
439            DEFAULT_WARNING_THRESHOLD * 100.0,
440        );
441        WeightSafety::PotentiallyUnsafe
442    } else {
443        WeightSafety::ProbablySafe
444    }
445}
446
447fn collect_storage_changes_as_hex<Block: BlockT>(
448    overlayed_changes: &OverlayedChanges<HashingFor<Block>>,
449) -> BTreeMap<String, String> {
450    overlayed_changes
451        .changes()
452        .map(|(key, entry)| {
453            (
454                HexDisplay::from(key).to_string(),
455                entry
456                    .clone()
457                    .value()
458                    .map_or_else(|| "<deleted>".to_string(), hex::encode),
459            )
460        })
461        .collect()
462}