try_runtime_core/commands/on_runtime_upgrade/
mod.rs

1// This file is part of try-runtime-cli.
2
3// Copyright (C) Parity Technologies (UK) Ltd.
4// SPDX-License-Identifier: Apache-2.0
5
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9//
10// 	http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18pub mod mbms;
19
20use std::{collections::BTreeMap, fmt::Debug, str::FromStr};
21
22use bytesize::ByteSize;
23use frame_remote_externalities::RemoteExternalities;
24use frame_try_runtime::UpgradeCheckSelect;
25use log::Level;
26use parity_scale_codec::Encode;
27use sc_executor::sp_wasm_interface::HostFunctions;
28use sp_core::{hexdisplay::HexDisplay, twox_128, Hasher, H256};
29use sp_runtime::{
30    traits::{Block as BlockT, HashingFor, NumberFor},
31    DeserializeOwned,
32};
33use sp_state_machine::{CompactProof, OverlayedChanges, StorageProof};
34
35use crate::{
36    commands::on_runtime_upgrade::mbms::MbmChecker,
37    common::{
38        misc_logging::{basti_log, LogLevelGuard},
39        state::{build_executor, state_machine_call_with_proof, RuntimeChecks, State},
40    },
41    RefTimeInfo, SharedParams, LOG_TARGET,
42};
43
44/// Configuration for [`run`].
45#[derive(Debug, Clone, clap::Parser)]
46pub struct Command {
47    /// The state type to use.
48    #[command(subcommand)]
49    pub state: State,
50
51    /// Select which optional checks to perform. Selects all when no value is given.
52    ///
53    /// - `none`: Perform no checks.
54    /// - `all`: Perform all checks (default when --checks is present with no value).
55    /// - `pre-and-post`: Perform pre- and post-upgrade checks (default when the arg is not
56    ///   present).
57    /// - `try-state`: Perform the try-state checks.
58    ///
59    /// Performing any checks will potentially invalidate the measured PoV/Weight.
60    // NOTE: The clap attributes make it backwards compatible with the previous `--checks` flag.
61    #[clap(long,
62		default_value = "pre-and-post",
63		default_missing_value = "all",
64		num_args = 0..=1,
65		verbatim_doc_comment
66    )]
67    pub checks: UpgradeCheckSelect,
68
69    /// Whether to disable weight warnings, useful if the runtime is for a relay chain.
70    #[clap(long, default_value = "false", default_missing_value = "true")]
71    pub no_weight_warnings: bool,
72
73    /// Whether to skip enforcing that the new runtime `spec_version` is greater or equal to the
74    /// existing `spec_version`.
75    #[clap(long, default_value = "false", default_missing_value = "true")]
76    pub disable_spec_version_check: bool,
77
78    /// Whether to disable migration idempotency checks
79    #[clap(long, default_value = "false", default_missing_value = "true")]
80    pub disable_idempotency_checks: bool,
81
82    /// When migrations are detected as not idempotent, enabling this will output a diff of the
83    /// storage before and after running the same set of migrations the second time.
84    #[clap(long, default_value = "false", default_missing_value = "true")]
85    pub print_storage_diff: bool,
86
87    /// Whether or multi-block migrations should be executed to completion after single block
88    /// migratons are completed.
89    #[clap(long, default_value = "false", default_missing_value = "true")]
90    pub disable_mbm_checks: bool,
91
92    /// The maximum duration we expect all MBMs combined to take.
93    ///
94    /// This value is just here to ensure that the CLI won't run forever in case of a buggy MBM.
95    #[clap(long, default_value = "600")]
96    pub mbm_max_blocks: u32,
97
98    /// The chain blocktime in milliseconds.
99    #[arg(long)]
100    pub blocktime: u64,
101}
102
103/// Convenience struct to hold all the generic args and where clauses.
104pub(crate) struct CheckOnRuntimeUpgrade<Block, HostFns> {
105    pub shared: SharedParams,
106    pub command: Command,
107    pub _phantom: std::marker::PhantomData<(Block, HostFns)>,
108}
109
110impl<Block: BlockT<Hash = H256> + DeserializeOwned, HostFns> CheckOnRuntimeUpgrade<Block, HostFns>
111where
112    Block: BlockT + serde::de::DeserializeOwned,
113    <Block::Hash as FromStr>::Err: Debug,
114    Block::Header: serde::de::DeserializeOwned,
115    NumberFor<Block>: FromStr,
116    <NumberFor<Block> as FromStr>::Err: Debug,
117    HostFns: HostFunctions,
118{
119    // Runs the `on-runtime-upgrade` command.
120    pub async fn run(&self) -> sc_cli::Result<()> {
121        let shared = &self.shared;
122        let command = &self.command;
123
124        let executor = build_executor(shared);
125        let runtime_checks = RuntimeChecks {
126            name_matches: !shared.disable_spec_name_check,
127            version_increases: !command.disable_spec_version_check,
128            try_runtime_feature_enabled: true,
129        };
130        let mut ext = command
131            .state
132            .to_ext::<Block, HostFns>(shared, &executor, None, runtime_checks)
133            .await?;
134
135        let sync_checks = if command.disable_mbm_checks {
136            command.checks
137        } else {
138            UpgradeCheckSelect::None
139        };
140
141        // Run `TryRuntime_on_runtime_upgrade` with the given checks.
142        basti_log(
143            Level::Info,
144            format!(
145                "🔬 Running TryRuntime_on_runtime_upgrade with checks: {:?}",
146                sync_checks
147            )
148            .as_str(),
149        );
150
151        // Check the Single-Block-Migrations work:
152        let mut overlayed_changes = Default::default();
153        let _ = state_machine_call_with_proof::<Block, HostFns>(
154            &ext,
155            &mut overlayed_changes,
156            &executor,
157            "TryRuntime_on_runtime_upgrade",
158            sync_checks.encode().as_ref(),
159            Default::default(), // we don't really need any extensions here.
160            shared.export_proof.clone(),
161        )?;
162
163        let idempotency_ok = self.check_idempotency(&mut ext, &overlayed_changes)?;
164        let weight_ok = self.check_weight(&ext)?;
165
166        self.check_mbms(runtime_checks).await?;
167
168        if !weight_ok || !idempotency_ok {
169            return Err("Runtime Upgrade issues detected, exiting non-zero. See logs.".into());
170        }
171
172        Ok(())
173    }
174
175    /// Check that the migrations are idempotent.
176    ///
177    /// Expects the overlayed changes from the first execution of the migrations.
178    fn check_idempotency(
179        &self,
180        ext: &mut RemoteExternalities<Block>,
181        changes: &OverlayedChanges<HashingFor<Block>>,
182    ) -> sc_cli::Result<bool> {
183        if !self.command.disable_idempotency_checks {
184            basti_log(
185                Level::Info,
186                format!(
187                    "🔬 Running TryRuntime_on_runtime_upgrade again to check idempotency: {:?}",
188                    self.command.checks
189                )
190                .as_str(),
191            );
192            let executor = build_executor(&self.shared);
193
194            let before = changes.clone();
195            let mut after = changes.clone();
196
197            // The MBM pallet refuses to interrupt ongoing MBMs, so we need to pretend that it did
198            // not run yet. We cannot just use a prefious state since the single-block-migrations
199            // would not be tested for idempotency.
200            // TODO add switch and guessing logic for the MBM pallet name.
201            let key = [twox_128(b"MultiBlockMigrations"), twox_128(b"Cursor")].concat();
202            after.clear_prefix(&key);
203
204            // Don't print all logs again.
205            // let _quiet = LogLevelGuard::only_errors();
206            match state_machine_call_with_proof::<Block, HostFns>(
207                ext,
208                &mut after,
209                &executor,
210                "TryRuntime_on_runtime_upgrade",
211                UpgradeCheckSelect::None.encode().as_ref(),
212                Default::default(),
213                self.shared.export_proof.clone(),
214            ) {
215                Ok(_) => {
216                    if self.changed(ext, before, after)? {
217                        log::error!("❌ Migrations must behave the same when executed twice. This was not the case as a storage root hash mismatch was detected. Remove migrations one-by-one and re-run until you find the culprit.");
218                        Ok(false)
219                    } else {
220                        log::info!("✅ Migrations are idempotent");
221                        Ok(true)
222                    }
223                }
224                Err(e) => {
225                    log::error!(
226                            "❌ Migrations are not idempotent, they failed during the second execution.",
227                        );
228                    log::debug!("{:?}", e);
229                    Ok(false)
230                }
231            }
232        } else {
233            log::info!("ℹ Skipping idempotency check");
234            Ok(true)
235        }
236    }
237
238    async fn check_mbms(&self, runtime_checks: RuntimeChecks) -> sc_cli::Result<()> {
239        if self.command.disable_mbm_checks {
240            log::info!("ℹ Skipping Multi-Block-Migrations");
241            return Ok(());
242        }
243
244        let checker = MbmChecker::<Block, HostFns> {
245            command: self.command.clone(),
246            shared: self.shared.clone(),
247            runtime_checks,
248            _phantom: Default::default(),
249        };
250
251        checker.check_mbms().await
252    }
253
254    /// Check that the migrations don't use more weights than a block.
255    fn check_weight(&self, ext: &RemoteExternalities<Block>) -> sc_cli::Result<bool> {
256        if self.command.no_weight_warnings {
257            log::info!("ℹ Skipping weight safety check");
258            return Ok(true);
259        }
260        basti_log(
261            Level::Info,
262            "🔬 TryRuntime_on_runtime_upgrade succeeded! Running it again for weight measurements.",
263        );
264
265        let executor = build_executor(&self.shared);
266        let _quiet = LogLevelGuard::only_errors();
267        let (proof, encoded_result) = state_machine_call_with_proof::<Block, HostFns>(
268            ext,
269            &mut Default::default(),
270            &executor,
271            "TryRuntime_on_runtime_upgrade",
272            UpgradeCheckSelect::None.encode().as_ref(),
273            Default::default(),
274            self.shared.export_proof.clone(),
275        )?;
276        let ref_time_results = encoded_result.try_into()?;
277        drop(_quiet);
278
279        let pre_root = ext.backend.root();
280        let pov_safety = analyse_pov::<HashingFor<Block>>(proof, *pre_root);
281        let ref_time_safety = analyse_ref_time(ref_time_results);
282
283        match (pov_safety, ref_time_safety) {
284            (WeightSafety::ProbablySafe, WeightSafety::ProbablySafe) => {
285                log::info!(
286                    target: LOG_TARGET,
287                    "✅ No weight safety issues detected. \
288                    Please note this does not guarantee a successful runtime upgrade. \
289                    Always test your runtime upgrade with recent state, and ensure that the weight usage \
290                    of your migrations will not drastically differ between testing and actual on-chain \
291                    execution."
292                );
293                Ok(true)
294            }
295            _ => {
296                log::error!(target: LOG_TARGET, "❌ Weight safety issues detected.");
297                Ok(false)
298            }
299        }
300    }
301
302    /// Whether any storage was changed.
303    fn changed(
304        &self,
305        ext: &RemoteExternalities<Block>,
306        mut before: OverlayedChanges<HashingFor<Block>>,
307        mut after: OverlayedChanges<HashingFor<Block>>,
308    ) -> sc_cli::Result<bool> {
309        // Events are fine to not be idempotent.
310        let key = [twox_128(b"System"), twox_128(b"Events")].concat();
311        after.clear_prefix(&key);
312        before.clear_prefix(&key);
313        let key = [twox_128(b"System"), twox_128(b"EventCount")].concat();
314        after.clear_prefix(&key);
315        before.clear_prefix(&key);
316
317        let (root_before, _) = before.storage_root(&ext.backend, ext.state_version);
318        let (root_after, _) = after.storage_root(&ext.backend, ext.state_version);
319
320        log::info!(
321            "Storage root before: 0x{}, after: 0x{}",
322            hex::encode(root_before),
323            hex::encode(root_after),
324        );
325
326        if root_before == root_after {
327            return Ok(false);
328        }
329
330        if self.command.print_storage_diff {
331            log::info!("Changed storage keys:");
332            let changes_before = collect_storage_changes_as_hex::<Block>(&before);
333            let changes_after = collect_storage_changes_as_hex::<Block>(&after);
334
335            similar_asserts::assert_eq!(changes_before, changes_after);
336            Err("Storage changes detected: migrations not idempotent".into())
337        } else {
338            log::error!("Run with --print-storage-diff to see list of changed storage keys.");
339            Ok(true)
340        }
341    }
342}
343
344enum WeightSafety {
345    ProbablySafe,
346    PotentiallyUnsafe,
347}
348
349/// The default maximum PoV size in MB.
350const DEFAULT_MAX_POV_SIZE: ByteSize = ByteSize::mb(5);
351
352/// The fraction of the total available ref_time or pov size after which a warning should be logged.
353const DEFAULT_WARNING_THRESHOLD: f32 = 0.8;
354
355/// Analyse the given ref_times and return if there is a potential weight safety issue.
356fn analyse_pov<H>(proof: StorageProof, pre_root: H::Out) -> WeightSafety
357where
358    H: Hasher,
359{
360    if proof.is_empty() {
361        log::info!(target: LOG_TARGET, "Empty PoV detected");
362        return WeightSafety::ProbablySafe;
363    }
364
365    let encoded_proof_size = proof.encoded_size();
366    let compact_proof = proof
367        .clone()
368        .into_compact_proof::<H>(pre_root)
369        .map_err(|e| {
370            log::error!(target: LOG_TARGET, "failed to generate compact proof: {:?}", e);
371            e
372        })
373        .unwrap_or(CompactProof {
374            encoded_nodes: Default::default(),
375        });
376
377    let compact_proof_size = compact_proof.encoded_size();
378    let compressed_compact_proof = zstd::stream::encode_all(&compact_proof.encode()[..], 0)
379        .map_err(|e| {
380            log::error!(
381                target: LOG_TARGET,
382                "failed to generate compressed proof: {:?}",
383                e
384            );
385            e
386        })
387        .expect("generating compressed proof should never fail if proof is valid");
388
389    let proof_nodes = proof.into_nodes();
390    log::debug!(
391        target: LOG_TARGET,
392        "Proof: 0x{}... / {} nodes",
393        HexDisplay::from(&proof_nodes.iter().flatten().cloned().take(10).collect::<Vec<_>>()),
394        proof_nodes.len()
395    );
396    log::debug!(target: LOG_TARGET, "Encoded proof size: {}", ByteSize(encoded_proof_size as u64));
397    log::debug!(target: LOG_TARGET, "Compact proof size: {}", ByteSize(compact_proof_size as u64),);
398    log::info!(
399        target: LOG_TARGET,
400        "PoV size (zstd-compressed compact proof): {}. For parachains, it's your responsibility \
401        to verify that a PoV of this size fits within any relaychain constraints.",
402        ByteSize(compressed_compact_proof.len() as u64),
403    );
404    if compressed_compact_proof.len() as f32
405        > DEFAULT_MAX_POV_SIZE.as_u64() as f32 * DEFAULT_WARNING_THRESHOLD
406    {
407        log::warn!(
408            target: LOG_TARGET,
409            "A PoV size of {} is significant. Most relay chains usually accept PoVs up to {}. \
410            Proceed with caution.",
411            ByteSize(compressed_compact_proof.len() as u64),
412            DEFAULT_MAX_POV_SIZE,
413        );
414        WeightSafety::PotentiallyUnsafe
415    } else {
416        WeightSafety::ProbablySafe
417    }
418}
419
420/// Analyse the given ref_times and return if there is a potential weight safety issue.
421fn analyse_ref_time(ref_time_results: RefTimeInfo) -> WeightSafety {
422    let RefTimeInfo { used, max } = ref_time_results;
423    let (used, max) = (used.as_secs_f32(), max.as_secs_f32());
424    log::info!(
425        target: LOG_TARGET,
426        "Consumed ref_time: {}s ({:.2}% of max {}s)",
427        used,
428        used / max * 100.0,
429        max,
430    );
431    if used >= max * DEFAULT_WARNING_THRESHOLD {
432        log::warn!(
433            target: LOG_TARGET,
434            "Consumed ref_time is >= {}% of the max allowed ref_time. Please ensure the \
435            migration is not be too computationally expensive to be fit in a single block.",
436            DEFAULT_WARNING_THRESHOLD * 100.0,
437        );
438        WeightSafety::PotentiallyUnsafe
439    } else {
440        WeightSafety::ProbablySafe
441    }
442}
443
444fn collect_storage_changes_as_hex<Block: BlockT>(
445    overlayed_changes: &OverlayedChanges<HashingFor<Block>>,
446) -> BTreeMap<String, String> {
447    overlayed_changes
448        .changes()
449        .map(|(key, entry)| {
450            (
451                HexDisplay::from(key).to_string(),
452                entry
453                    .clone()
454                    .value()
455                    .map_or_else(|| "<deleted>".to_string(), hex::encode),
456            )
457        })
458        .collect()
459}