try_runtime_core/commands/on_runtime_upgrade/
mod.rs

1// This file is part of try-runtime-cli.
2
3// Copyright (C) Parity Technologies (UK) Ltd.
4// SPDX-License-Identifier: Apache-2.0
5
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9//
10// 	http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18pub mod mbms;
19
20use std::{collections::BTreeMap, fmt::Debug, str::FromStr};
21
22use bytesize::ByteSize;
23use frame_remote_externalities::RemoteExternalities;
24use frame_try_runtime::UpgradeCheckSelect;
25use log::Level;
26use parity_scale_codec::Encode;
27use sc_executor::sp_wasm_interface::HostFunctions;
28use sp_core::{hexdisplay::HexDisplay, Hasher, H256};
29use sp_crypto_hashing::twox_128;
30use sp_runtime::{
31    traits::{Block as BlockT, HashingFor, NumberFor},
32    DeserializeOwned,
33};
34use sp_state_machine::{CompactProof, OverlayedChanges, StorageProof};
35
36use crate::{
37    commands::on_runtime_upgrade::mbms::MbmChecker,
38    common::{
39        misc_logging::{basti_log, LogLevelGuard},
40        state::{build_executor, state_machine_call_with_proof, RuntimeChecks, State},
41    },
42    RefTimeInfo, SharedParams, LOG_TARGET,
43};
44
45/// Configuration for the `on_runtime_upgrade` command.
46///
47/// The parameters below control the behavior of runtime upgrade checks e.g. whether to disable
48/// weight warnings, whether to run multi-block migration checks.
49#[derive(Debug, Clone, clap::Parser)]
50pub struct Command {
51    /// The source of the blockchain state to use when running the `on-runtime-upgrade` command.
52    #[command(subcommand)]
53    pub state: State,
54
55    /// Select which optional checks to perform. Selects all when no value is given.
56    ///
57    /// - `none`: Perform no checks.
58    /// - `all`: Perform all checks (default when --checks is present with no value).
59    /// - `pre-and-post`: Perform pre- and post-upgrade checks (default when the arg is not
60    ///   present).
61    /// - `try-state`: Perform the try-state checks.
62    ///
63    /// Performing any checks will potentially invalidate the measured PoV/Weight.
64    // NOTE: The clap attributes make it backwards compatible with the previous `--checks` flag.
65    #[clap(long,
66		default_value = "pre-and-post",
67		default_missing_value = "all",
68		num_args = 0..=1,
69		verbatim_doc_comment
70    )]
71    pub checks: UpgradeCheckSelect,
72
73    /// Whether to disable weight warnings. Useful if a relay chain's runtime is being tested.
74    #[clap(long, default_value = "false", default_missing_value = "true")]
75    pub no_weight_warnings: bool,
76
77    /// Whether to skip enforcing that the new runtime `spec_version` is greater or equal to the
78    /// existing `spec_version`.
79    #[clap(long, default_value = "false", default_missing_value = "true")]
80    pub disable_spec_version_check: bool,
81
82    /// Whether to disable migration idempotency checks.
83    #[clap(long, default_value = "false", default_missing_value = "true")]
84    pub disable_idempotency_checks: bool,
85
86    /// When migrations are detected as not idempotent, enabling this will output a diff of the
87    /// storage before and after running the same set of migrations the second time.
88    #[clap(long, default_value = "false", default_missing_value = "true")]
89    pub print_storage_diff: bool,
90
91    /// Whether or not multi-block migrations should be executed to completion after single-block
92    /// migratons are completed.
93    #[clap(long, default_value = "false", default_missing_value = "true")]
94    pub disable_mbm_checks: bool,
95
96    /// The maximum duration that all MBMs combined are expected to take.
97    ///
98    /// This value ensures the CLI won't run indefinitely in case of a buggy MBM.
99    #[clap(long, default_value = "600")]
100    pub mbm_max_blocks: u32,
101
102    /// The chain blocktime, in milliseconds.
103    #[arg(long)]
104    pub blocktime: u64,
105}
106
107/// Convenience struct to hold all the generic args and where clauses.
108pub(crate) struct CheckOnRuntimeUpgrade<Block, HostFns> {
109    pub shared: SharedParams,
110    pub command: Command,
111    pub _phantom: std::marker::PhantomData<(Block, HostFns)>,
112}
113
114impl<Block: BlockT<Hash = H256> + DeserializeOwned, HostFns> CheckOnRuntimeUpgrade<Block, HostFns>
115where
116    Block: BlockT + serde::de::DeserializeOwned,
117    <Block::Hash as FromStr>::Err: Debug,
118    Block::Header: serde::de::DeserializeOwned,
119    NumberFor<Block>: FromStr,
120    <NumberFor<Block> as FromStr>::Err: Debug,
121    HostFns: HostFunctions,
122{
123    // Runs the `on-runtime-upgrade` command.
124    pub async fn run(&self) -> sc_cli::Result<()> {
125        let shared = &self.shared;
126        let command = &self.command;
127
128        let executor = build_executor(shared);
129        let runtime_checks = RuntimeChecks {
130            name_matches: !shared.disable_spec_name_check,
131            version_increases: !command.disable_spec_version_check,
132            try_runtime_feature_enabled: true,
133        };
134        let mut ext = command
135            .state
136            .to_ext::<Block, HostFns>(shared, &executor, None, runtime_checks)
137            .await?;
138
139        let sync_checks = if command.disable_mbm_checks {
140            command.checks
141        } else {
142            UpgradeCheckSelect::None
143        };
144
145        // Run `TryRuntime_on_runtime_upgrade` with the given checks.
146        basti_log(
147            Level::Info,
148            format!(
149                "🔬 Running TryRuntime_on_runtime_upgrade with checks: {:?}",
150                sync_checks
151            )
152            .as_str(),
153        );
154
155        // Check the Single-Block-Migrations work:
156        let mut overlayed_changes = Default::default();
157        let _ = state_machine_call_with_proof::<Block, HostFns>(
158            &ext,
159            &mut overlayed_changes,
160            &executor,
161            "TryRuntime_on_runtime_upgrade",
162            sync_checks.encode().as_ref(),
163            Default::default(), // we don't really need any extensions here.
164            shared.export_proof.clone(),
165        )?;
166
167        let idempotency_ok = self.check_idempotency(&mut ext, &overlayed_changes)?;
168        let weight_ok = self.check_weight(&ext)?;
169
170        self.check_mbms(runtime_checks).await?;
171
172        if !weight_ok || !idempotency_ok {
173            return Err("Runtime Upgrade issues detected, exiting non-zero. See logs.".into());
174        }
175
176        Ok(())
177    }
178
179    /// Check that the migrations are idempotent.
180    ///
181    /// Expects the overlayed changes from the first execution of the migrations.
182    fn check_idempotency(
183        &self,
184        ext: &mut RemoteExternalities<Block>,
185        changes: &OverlayedChanges<HashingFor<Block>>,
186    ) -> sc_cli::Result<bool> {
187        if !self.command.disable_idempotency_checks {
188            basti_log(
189                Level::Info,
190                format!(
191                    "🔬 Running TryRuntime_on_runtime_upgrade again to check idempotency: {:?}",
192                    self.command.checks
193                )
194                .as_str(),
195            );
196            let executor = build_executor(&self.shared);
197
198            let before = changes.clone();
199            let mut after = changes.clone();
200
201            // The MBM pallet refuses to interrupt ongoing MBMs, so we need to pretend that it did
202            // not run yet. We cannot just use a prefious state since the single-block-migrations
203            // would not be tested for idempotency.
204            // TODO add switch and guessing logic for the MBM pallet name.
205            let key = [twox_128(b"MultiBlockMigrations"), twox_128(b"Cursor")].concat();
206            after.clear_prefix(&key);
207
208            // Don't print all logs again.
209            // let _quiet = LogLevelGuard::only_errors();
210            match state_machine_call_with_proof::<Block, HostFns>(
211                ext,
212                &mut after,
213                &executor,
214                "TryRuntime_on_runtime_upgrade",
215                UpgradeCheckSelect::None.encode().as_ref(),
216                Default::default(),
217                self.shared.export_proof.clone(),
218            ) {
219                Ok(_) => {
220                    if self.changed(ext, before, after)? {
221                        log::error!("❌ Migrations must behave the same when executed twice. This was not the case as a storage root hash mismatch was detected. Remove migrations one-by-one and re-run until you find the culprit.");
222                        Ok(false)
223                    } else {
224                        log::info!("✅ Migrations are idempotent");
225                        Ok(true)
226                    }
227                }
228                Err(e) => {
229                    log::error!(
230                            "❌ Migrations are not idempotent, they failed during the second execution.",
231                        );
232                    log::debug!("{:?}", e);
233                    Ok(false)
234                }
235            }
236        } else {
237            log::info!("ℹ Skipping idempotency check");
238            Ok(true)
239        }
240    }
241
242    async fn check_mbms(&self, runtime_checks: RuntimeChecks) -> sc_cli::Result<()> {
243        if self.command.disable_mbm_checks {
244            log::info!("ℹ Skipping Multi-Block-Migrations");
245            return Ok(());
246        }
247
248        let checker = MbmChecker::<Block, HostFns> {
249            command: self.command.clone(),
250            shared: self.shared.clone(),
251            runtime_checks,
252            _phantom: Default::default(),
253        };
254
255        checker.check_mbms().await
256    }
257
258    /// Check that the migrations don't use more weights than a block.
259    fn check_weight(&self, ext: &RemoteExternalities<Block>) -> sc_cli::Result<bool> {
260        if self.command.no_weight_warnings {
261            log::info!("ℹ Skipping weight safety check");
262            return Ok(true);
263        }
264        basti_log(
265            Level::Info,
266            "🔬 TryRuntime_on_runtime_upgrade succeeded! Running it again for weight measurements.",
267        );
268
269        let executor = build_executor(&self.shared);
270        let _quiet = LogLevelGuard::only_errors();
271        let (proof, encoded_result) = state_machine_call_with_proof::<Block, HostFns>(
272            ext,
273            &mut Default::default(),
274            &executor,
275            "TryRuntime_on_runtime_upgrade",
276            UpgradeCheckSelect::None.encode().as_ref(),
277            Default::default(),
278            self.shared.export_proof.clone(),
279        )?;
280        let ref_time_results = encoded_result.try_into()?;
281        drop(_quiet);
282
283        let pre_root = ext.backend.root();
284        let pov_safety = analyse_pov::<HashingFor<Block>>(proof, *pre_root);
285        let ref_time_safety = analyse_ref_time(ref_time_results);
286
287        match (pov_safety, ref_time_safety) {
288            (WeightSafety::ProbablySafe, WeightSafety::ProbablySafe) => {
289                log::info!(
290                    target: LOG_TARGET,
291                    "✅ No weight safety issues detected. \
292                    Please note this does not guarantee a successful runtime upgrade. \
293                    Always test your runtime upgrade with recent state, and ensure that the weight usage \
294                    of your migrations will not drastically differ between testing and actual on-chain \
295                    execution."
296                );
297                Ok(true)
298            }
299            _ => {
300                log::error!(target: LOG_TARGET, "❌ Weight safety issues detected.");
301                Ok(false)
302            }
303        }
304    }
305
306    /// Whether any storage was changed.
307    fn changed(
308        &self,
309        ext: &RemoteExternalities<Block>,
310        mut before: OverlayedChanges<HashingFor<Block>>,
311        mut after: OverlayedChanges<HashingFor<Block>>,
312    ) -> sc_cli::Result<bool> {
313        // Events are fine to not be idempotent.
314        let key = [twox_128(b"System"), twox_128(b"Events")].concat();
315        after.clear_prefix(&key);
316        before.clear_prefix(&key);
317        let key = [twox_128(b"System"), twox_128(b"EventCount")].concat();
318        after.clear_prefix(&key);
319        before.clear_prefix(&key);
320
321        let (root_before, _) = before.storage_root(&ext.backend, ext.state_version);
322        let (root_after, _) = after.storage_root(&ext.backend, ext.state_version);
323
324        log::info!(
325            "Storage root before: 0x{}, after: 0x{}",
326            hex::encode(root_before),
327            hex::encode(root_after),
328        );
329
330        if root_before == root_after {
331            return Ok(false);
332        }
333
334        if self.command.print_storage_diff {
335            log::info!("Changed storage keys:");
336            let changes_before = collect_storage_changes_as_hex::<Block>(&before);
337            let changes_after = collect_storage_changes_as_hex::<Block>(&after);
338
339            similar_asserts::assert_eq!(changes_before, changes_after);
340            Err("Storage changes detected: migrations not idempotent".into())
341        } else {
342            log::error!("Run with --print-storage-diff to see list of changed storage keys.");
343            Ok(true)
344        }
345    }
346}
347
348enum WeightSafety {
349    ProbablySafe,
350    PotentiallyUnsafe,
351}
352
353/// The default maximum PoV size in MB.
354const DEFAULT_MAX_POV_SIZE: ByteSize = ByteSize::mb(5);
355
356/// The fraction of the total available ref_time or pov size after which a warning should be logged.
357const DEFAULT_WARNING_THRESHOLD: f32 = 0.8;
358
359/// Analyse the given ref_times and return if there is a potential weight safety issue.
360fn analyse_pov<H>(proof: StorageProof, pre_root: H::Out) -> WeightSafety
361where
362    H: Hasher,
363{
364    if proof.is_empty() {
365        log::info!(target: LOG_TARGET, "Empty PoV detected");
366        return WeightSafety::ProbablySafe;
367    }
368
369    let encoded_proof_size = proof.encoded_size();
370    let compact_proof = proof
371        .clone()
372        .into_compact_proof::<H>(pre_root)
373        .map_err(|e| {
374            log::error!(target: LOG_TARGET, "failed to generate compact proof: {:?}", e);
375            e
376        })
377        .unwrap_or(CompactProof {
378            encoded_nodes: Default::default(),
379        });
380
381    let compact_proof_size = compact_proof.encoded_size();
382    let compressed_compact_proof = zstd::stream::encode_all(&compact_proof.encode()[..], 0)
383        .map_err(|e| {
384            log::error!(
385                target: LOG_TARGET,
386                "failed to generate compressed proof: {:?}",
387                e
388            );
389            e
390        })
391        .expect("generating compressed proof should never fail if proof is valid");
392
393    let proof_nodes = proof.into_nodes();
394    log::debug!(
395        target: LOG_TARGET,
396        "Proof: 0x{}... / {} nodes",
397        HexDisplay::from(&proof_nodes.iter().flatten().cloned().take(10).collect::<Vec<_>>()),
398        proof_nodes.len()
399    );
400    log::debug!(target: LOG_TARGET, "Encoded proof size: {}", ByteSize(encoded_proof_size as u64));
401    log::debug!(target: LOG_TARGET, "Compact proof size: {}", ByteSize(compact_proof_size as u64),);
402    log::info!(
403        target: LOG_TARGET,
404        "PoV size (zstd-compressed compact proof): {}. For parachains, it's your responsibility \
405        to verify that a PoV of this size fits within any relaychain constraints.",
406        ByteSize(compressed_compact_proof.len() as u64),
407    );
408    if compressed_compact_proof.len() as f32
409        > DEFAULT_MAX_POV_SIZE.as_u64() as f32 * DEFAULT_WARNING_THRESHOLD
410    {
411        log::warn!(
412            target: LOG_TARGET,
413            "A PoV size of {} is significant. Most relay chains usually accept PoVs up to {}. \
414            Proceed with caution.",
415            ByteSize(compressed_compact_proof.len() as u64),
416            DEFAULT_MAX_POV_SIZE,
417        );
418        WeightSafety::PotentiallyUnsafe
419    } else {
420        WeightSafety::ProbablySafe
421    }
422}
423
424/// Analyse the given ref_times and return if there is a potential weight safety issue.
425fn analyse_ref_time(ref_time_results: RefTimeInfo) -> WeightSafety {
426    let RefTimeInfo { used, max } = ref_time_results;
427    let (used, max) = (used.as_secs_f32(), max.as_secs_f32());
428    log::info!(
429        target: LOG_TARGET,
430        "Consumed ref_time: {}s ({:.2}% of max {}s)",
431        used,
432        used / max * 100.0,
433        max,
434    );
435    if used >= max * DEFAULT_WARNING_THRESHOLD {
436        log::warn!(
437            target: LOG_TARGET,
438            "Consumed ref_time is >= {}% of the max allowed ref_time. Please ensure the \
439            migration is not be too computationally expensive to be fit in a single block.",
440            DEFAULT_WARNING_THRESHOLD * 100.0,
441        );
442        WeightSafety::PotentiallyUnsafe
443    } else {
444        WeightSafety::ProbablySafe
445    }
446}
447
448fn collect_storage_changes_as_hex<Block: BlockT>(
449    overlayed_changes: &OverlayedChanges<HashingFor<Block>>,
450) -> BTreeMap<String, String> {
451    overlayed_changes
452        .changes()
453        .map(|(key, entry)| {
454            (
455                HexDisplay::from(key).to_string(),
456                entry
457                    .clone()
458                    .value()
459                    .map_or_else(|| "<deleted>".to_string(), hex::encode),
460            )
461        })
462        .collect()
463}