Skip to main content

zombienet_sdk/
snapshot.rs

1//! Bundle per-node DB snapshots into a single uploadable artifact.
2//!
3//! The per-node tarballs are produced by [`NetworkNode::snapshot_db`]
4//! (in the orchestrator crate). This module packs them — plus a JSON
5//! `user_data` blob — into a single outer `bundle.tar.gz` with a
6//! [`SnapshotManifest`] sidecar so consumers can verify checksums and
7//! discover what's inside.
8//!
9//! Layout inside the bundle:
10//! ```text
11//! <archive1>.tgz
12//! <archive2>.tgz
13//! ...
14//! manifest.json   // schema: SnapshotManifest
15//! ```
16//!
17//! Consumer side is user-owned: the user untars the bundle and passes the
18//! inner `.tgz` paths to `with_db_snapshot(...)` on a `NodeConfigBuilder`.
19//!
20//! [`NetworkNode::snapshot_db`]: orchestrator::network::node::NetworkNode::snapshot_db
21
22use std::{
23    fs::File,
24    io::{self, Write},
25    marker::PhantomData,
26    path::{Path, PathBuf},
27};
28
29use anyhow::{anyhow, Context};
30use chrono::Utc;
31use flate2::{read::GzDecoder, write::GzEncoder, Compression};
32use orchestrator::shared::types::NodeSnapshot;
33use serde::{Deserialize, Serialize};
34use sha2::{Digest, Sha256};
35use tar::Archive;
36
37/// Result of [`BundleBuilder::build`].
38#[derive(Debug, Clone)]
39pub struct Bundle {
40    /// Absolute path to the produced `bundle.tar.gz`.
41    pub path: PathBuf,
42    /// Hex-encoded SHA-256 of the outer bundle contents.
43    pub sha256: String,
44    /// Size of the outer bundle in bytes.
45    pub size: u64,
46}
47
48/// Schema of `manifest.json` inside the bundle. Versioned —
49/// [`MANIFEST_SCHEMA_VERSION`] bumps are breaking changes.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SnapshotManifest {
52    pub schema_version: u32,
53    /// RFC 3339 timestamp at bundle-build time.
54    pub created_at: String,
55    /// Collection of [`ArchiveEntry`]s
56    pub archives: Vec<ArchiveEntry>,
57    /// Caller-provided payload from [`BundleBuilder::user_data`]. Free-form;
58    /// shape is the test author's responsibility.
59    pub user_data: serde_json::Value,
60}
61
62/// Per-archive metadata inside a [`SnapshotManifest`].
63/// This is similar to [`NodeSnapshot`] but using the _filename_
64/// instead of the path.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct ArchiveEntry {
67    /// Basename inside the bundle (e.g. `"relaychain-db.tgz"`).
68    pub file: String,
69    /// Hex-encoded SHA-256 of the archive's bytes.
70    pub sha256: String,
71    /// Size of the archive in bytes.
72    pub size: u64,
73    /// Name of the node that produced this archive.
74    pub node_name: String,
75}
76
77/// Stable schema version of [`SnapshotManifest`]. Bump on breaking changes.
78pub const MANIFEST_SCHEMA_VERSION: u32 = 1;
79
80/// Typestate marker: no archives added yet.
81pub struct Empty;
82/// Typestate marker: at least one archive has been added.
83pub struct NonEmpty;
84
85/// Assembles a single `bundle.tar.gz` from one or more [`NodeSnapshot`]s
86/// plus a JSON `user_data` blob.
87///
88/// The typestate makes [`build`](BundleBuilder::build) unreachable until
89/// at least one archive has been added.
90///
91/// # Example
92/// ```ignore
93/// let bundle = BundleBuilder::new()
94///     .add(relay_snap)
95///     .add(para_snap)
96///     .user_data(json!({ "snapshot_height": 930 }))
97///     .build("bundle.tar.gz")?;
98/// ```
99pub struct BundleBuilder<S = Empty> {
100    snaps: Vec<NodeSnapshot>,
101    user_data: serde_json::Value,
102    _state: PhantomData<S>,
103}
104
105impl Default for BundleBuilder<Empty> {
106    fn default() -> Self {
107        Self::new()
108    }
109}
110
111impl BundleBuilder<Empty> {
112    pub fn new() -> Self {
113        Self {
114            snaps: Vec::new(),
115            user_data: serde_json::Value::Null,
116            _state: PhantomData,
117        }
118    }
119
120    /// Add the first per-node archive. Transitions the builder to
121    /// [`NonEmpty`], which is the only state that exposes
122    /// [`build`](BundleBuilder::build).
123    #[allow(clippy::should_implement_trait)]
124    pub fn add(mut self, snap: NodeSnapshot) -> BundleBuilder<NonEmpty> {
125        self.snaps.push(snap);
126        BundleBuilder {
127            snaps: self.snaps,
128            user_data: self.user_data,
129            _state: PhantomData,
130        }
131    }
132}
133
134impl BundleBuilder<NonEmpty> {
135    /// Add a subsequent per-node archive.
136    #[allow(clippy::should_implement_trait)]
137    pub fn add(mut self, snap: NodeSnapshot) -> Self {
138        self.snaps.push(snap);
139        self
140    }
141
142    /// Produce `out_path` (gzipped outer tarball). Only callable once at
143    /// least one archive has been added — enforced at compile time.
144    pub fn build(self, out_path: impl AsRef<Path>) -> anyhow::Result<Bundle> {
145        let out_path = out_path.as_ref().to_path_buf();
146        build_bundle(out_path, self.snaps, self.user_data)
147    }
148}
149
150impl<S> BundleBuilder<S> {
151    /// Attach an arbitrary serializable blob. Stored as JSON under
152    /// `user_data` in the manifest. Test authors put block heights,
153    /// CIDs, release tags, "number of collators", etc. here. Can be
154    /// called before or after [`add`](BundleBuilder::add); last call wins.
155    pub fn user_data<T: Serialize>(mut self, data: T) -> Self {
156        self.user_data = serde_json::to_value(&data).unwrap_or(serde_json::Value::Null);
157        self
158    }
159}
160
161fn build_bundle(
162    out_path: PathBuf,
163    snaps: Vec<NodeSnapshot>,
164    user_data: serde_json::Value,
165) -> anyhow::Result<Bundle> {
166    // Build the manifest from the per-archive metadata the orchestrator
167    // already computed when each .tgz was produced.
168    let entries: Vec<ArchiveEntry> = snaps
169        .iter()
170        .map(|snap| {
171            let file = snap
172                .path
173                .file_name()
174                .ok_or_else(|| anyhow!("snapshot path {} has no filename", snap.path.display()))?
175                .to_string_lossy()
176                .into_owned();
177            Ok::<_, anyhow::Error>(ArchiveEntry {
178                file,
179                sha256: snap.sha256.clone(),
180                size: snap.size,
181                node_name: snap.node_name.clone(),
182            })
183        })
184        .collect::<Result<_, _>>()?;
185
186    let manifest = SnapshotManifest {
187        schema_version: MANIFEST_SCHEMA_VERSION,
188        created_at: Utc::now().to_rfc3339(),
189        archives: entries,
190        user_data,
191    };
192    let manifest_bytes =
193        serde_json::to_vec_pretty(&manifest).context("serialising SnapshotManifest")?;
194
195    // Tar everything in: per-node .tgzs (read from disk) + manifest.json
196    // (in memory). Top-level entries are flat — no subdirectory.
197    let f = File::create(&out_path).with_context(|| format!("creating {}", out_path.display()))?;
198    let gz = GzEncoder::new(f, Compression::default());
199    let mut tar = tar::Builder::new(gz);
200
201    for (snap, entry) in snaps.iter().zip(manifest.archives.iter()) {
202        tar.append_path_with_name(&snap.path, &entry.file)
203            .with_context(|| format!("appending {} as {}", snap.path.display(), entry.file))?;
204    }
205
206    {
207        let mut header = tar::Header::new_gnu();
208        header.set_size(manifest_bytes.len() as u64);
209        header.set_mode(0o644);
210        header.set_mtime(
211            std::time::SystemTime::now()
212                .duration_since(std::time::UNIX_EPOCH)
213                .map(|d| d.as_secs())
214                .unwrap_or(0),
215        );
216        header.set_cksum();
217        tar.append_data(&mut header, "manifest.json", manifest_bytes.as_slice())
218            .context("appending manifest.json")?;
219    }
220
221    let gz = tar.into_inner().context("finishing tar")?;
222    let mut f = gz.finish().context("finishing gzip")?;
223    f.flush().context("flushing bundle file")?;
224    drop(f);
225
226    let mut file = File::open(&out_path)
227        .with_context(|| format!("reading produced bundle {}", out_path.display()))?;
228    let mut sha256 = Sha256::new();
229    let size = io::copy(&mut file, &mut sha256).with_context(|| {
230        format!(
231            "can not copy from file {} to generate hash",
232            out_path.display()
233        )
234    })?;
235    let sha256 = hex::encode(sha256.finalize());
236
237    Ok(Bundle {
238        path: out_path,
239        sha256,
240        size,
241    })
242}
243
244/// Helper function to untar the produced bundle into a destiantion path.
245pub fn untar_bundle(bundle_path: &Path, out_dir: &Path) -> anyhow::Result<()> {
246    std::fs::create_dir_all(out_dir)?;
247    let f = File::open(bundle_path)?;
248    let gz = GzDecoder::new(f);
249    let mut archive = Archive::new(gz);
250    archive.unpack(out_dir)?;
251    Ok(())
252}
253
254#[cfg(test)]
255mod tests {
256    use std::collections::BTreeSet;
257
258    use flate2::read::GzDecoder;
259    use serde_json::json;
260    use sha2::Digest;
261    use tar::Archive;
262
263    use super::*;
264
265    fn sha256_of(bytes: &[u8]) -> String {
266        hex::encode(sha2::Sha256::digest(bytes))
267    }
268
269    /// Write `bytes` to `dir/name` and return a `NodeSnapshot` describing it,
270    /// mirroring what `NetworkNode::snapshot_db` records on disk.
271    fn fake_snapshot(dir: &Path, name: &str, node_name: &str, bytes: &[u8]) -> NodeSnapshot {
272        let path = dir.join(name);
273        std::fs::write(&path, bytes).expect("write dummy archive");
274        NodeSnapshot {
275            path,
276            sha256: sha256_of(bytes),
277            size: bytes.len() as u64,
278            node_name: node_name.to_string(),
279        }
280    }
281
282    fn temp_dir() -> PathBuf {
283        let dir = std::env::temp_dir().join(format!("zombie-bundle-test-{}", uuid::Uuid::new_v4()));
284        std::fs::create_dir_all(&dir).expect("create temp dir");
285        dir
286    }
287
288    fn unpack(bundle: &Path, into: &Path) {
289        std::fs::create_dir_all(into).expect("create extract dir");
290        let f = std::fs::File::open(bundle).expect("open bundle");
291        Archive::new(GzDecoder::new(f))
292            .unpack(into)
293            .expect("unpack bundle");
294    }
295
296    // NOTE: the typestate guarantee (`build` is only callable after at least
297    // one `add`) is enforced at compile time — `BundleBuilder::new().build()`
298    // does not compile — so it isn't exercised here.
299
300    #[test]
301    fn build_produces_bundle_and_manifest() {
302        let dir = temp_dir();
303        let relay_bytes = b"dummy-relay-db-contents".as_slice();
304        let para_bytes = b"dummy-para-db-contents-longer".as_slice();
305        let relay = fake_snapshot(&dir, "relaychain-db.tgz", "alice", relay_bytes);
306        let para = fake_snapshot(&dir, "parachain-db.tgz", "collator-1", para_bytes);
307
308        let bundle = BundleBuilder::new()
309            .add(relay.clone())
310            .add(para.clone())
311            .user_data(json!({ "snapshot_height": 42 }))
312            .build(dir.join("bundle.tar.gz"))
313            .expect("bundle builds");
314
315        // Bundle metadata matches the file on disk.
316        assert!(bundle.path.is_file());
317        let on_disk = std::fs::read(&bundle.path).expect("read bundle");
318        assert_eq!(sha256_of(&on_disk), bundle.sha256);
319        assert_eq!(on_disk.len() as u64, bundle.size);
320
321        // Bundle contains exactly the two archives + manifest.json.
322        let extracted = dir.join("extracted");
323        unpack(&bundle.path, &extracted);
324        let entries: BTreeSet<String> = std::fs::read_dir(&extracted)
325            .expect("read extract dir")
326            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
327            .collect();
328        assert_eq!(
329            entries,
330            BTreeSet::from([
331                "relaychain-db.tgz".to_string(),
332                "parachain-db.tgz".to_string(),
333                "manifest.json".to_string(),
334            ])
335        );
336
337        // Inner archive bytes round-trip unchanged.
338        assert_eq!(
339            std::fs::read(extracted.join("relaychain-db.tgz")).unwrap(),
340            relay_bytes
341        );
342        assert_eq!(
343            std::fs::read(extracted.join("parachain-db.tgz")).unwrap(),
344            para_bytes
345        );
346
347        // Manifest content.
348        let manifest: SnapshotManifest =
349            serde_json::from_slice(&std::fs::read(extracted.join("manifest.json")).unwrap())
350                .expect("manifest deserialises");
351        assert_eq!(manifest.schema_version, MANIFEST_SCHEMA_VERSION);
352        assert!(!manifest.created_at.is_empty());
353        assert_eq!(manifest.user_data["snapshot_height"], json!(42));
354        assert_eq!(manifest.archives.len(), 2);
355
356        for (entry, snap) in manifest.archives.iter().zip([&relay, &para]) {
357            assert_eq!(entry.file, snap.path.file_name().unwrap().to_string_lossy());
358            assert_eq!(entry.sha256, snap.sha256);
359            assert_eq!(entry.size, snap.size);
360            assert_eq!(entry.node_name, snap.node_name);
361        }
362
363        let _ = std::fs::remove_dir_all(&dir);
364    }
365
366    #[test]
367    fn single_archive_default_user_data() {
368        let dir = temp_dir();
369        let snap = fake_snapshot(&dir, "relaychain-db.tgz", "alice", b"x");
370
371        let bundle = BundleBuilder::new()
372            .add(snap)
373            .build(dir.join("bundle.tar.gz"))
374            .expect("bundle builds");
375
376        let extracted = dir.join("extracted");
377        unpack(&bundle.path, &extracted);
378        let entries: BTreeSet<String> = std::fs::read_dir(&extracted)
379            .unwrap()
380            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
381            .collect();
382        assert_eq!(
383            entries,
384            BTreeSet::from(["relaychain-db.tgz".to_string(), "manifest.json".to_string()])
385        );
386
387        let manifest: SnapshotManifest =
388            serde_json::from_slice(&std::fs::read(extracted.join("manifest.json")).unwrap())
389                .expect("manifest deserialises");
390        assert_eq!(manifest.archives.len(), 1);
391        assert_eq!(manifest.user_data, serde_json::Value::Null);
392
393        let _ = std::fs::remove_dir_all(&dir);
394    }
395}