1use std::{
23 fs::File,
24 io::{self, Write},
25 marker::PhantomData,
26 path::{Path, PathBuf},
27};
28
29use anyhow::{anyhow, Context};
30use chrono::Utc;
31use flate2::{read::GzDecoder, write::GzEncoder, Compression};
32use orchestrator::shared::types::NodeSnapshot;
33use serde::{Deserialize, Serialize};
34use sha2::{Digest, Sha256};
35use tar::Archive;
36
37#[derive(Debug, Clone)]
39pub struct Bundle {
40 pub path: PathBuf,
42 pub sha256: String,
44 pub size: u64,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SnapshotManifest {
52 pub schema_version: u32,
53 pub created_at: String,
55 pub archives: Vec<ArchiveEntry>,
57 pub user_data: serde_json::Value,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct ArchiveEntry {
67 pub file: String,
69 pub sha256: String,
71 pub size: u64,
73 pub node_name: String,
75}
76
77pub const MANIFEST_SCHEMA_VERSION: u32 = 1;
79
80pub struct Empty;
82pub struct NonEmpty;
84
85pub struct BundleBuilder<S = Empty> {
100 snaps: Vec<NodeSnapshot>,
101 user_data: serde_json::Value,
102 _state: PhantomData<S>,
103}
104
105impl Default for BundleBuilder<Empty> {
106 fn default() -> Self {
107 Self::new()
108 }
109}
110
111impl BundleBuilder<Empty> {
112 pub fn new() -> Self {
113 Self {
114 snaps: Vec::new(),
115 user_data: serde_json::Value::Null,
116 _state: PhantomData,
117 }
118 }
119
120 #[allow(clippy::should_implement_trait)]
124 pub fn add(mut self, snap: NodeSnapshot) -> BundleBuilder<NonEmpty> {
125 self.snaps.push(snap);
126 BundleBuilder {
127 snaps: self.snaps,
128 user_data: self.user_data,
129 _state: PhantomData,
130 }
131 }
132}
133
134impl BundleBuilder<NonEmpty> {
135 #[allow(clippy::should_implement_trait)]
137 pub fn add(mut self, snap: NodeSnapshot) -> Self {
138 self.snaps.push(snap);
139 self
140 }
141
142 pub fn build(self, out_path: impl AsRef<Path>) -> anyhow::Result<Bundle> {
145 let out_path = out_path.as_ref().to_path_buf();
146 build_bundle(out_path, self.snaps, self.user_data)
147 }
148}
149
150impl<S> BundleBuilder<S> {
151 pub fn user_data<T: Serialize>(mut self, data: T) -> Self {
156 self.user_data = serde_json::to_value(&data).unwrap_or(serde_json::Value::Null);
157 self
158 }
159}
160
161fn build_bundle(
162 out_path: PathBuf,
163 snaps: Vec<NodeSnapshot>,
164 user_data: serde_json::Value,
165) -> anyhow::Result<Bundle> {
166 let entries: Vec<ArchiveEntry> = snaps
169 .iter()
170 .map(|snap| {
171 let file = snap
172 .path
173 .file_name()
174 .ok_or_else(|| anyhow!("snapshot path {} has no filename", snap.path.display()))?
175 .to_string_lossy()
176 .into_owned();
177 Ok::<_, anyhow::Error>(ArchiveEntry {
178 file,
179 sha256: snap.sha256.clone(),
180 size: snap.size,
181 node_name: snap.node_name.clone(),
182 })
183 })
184 .collect::<Result<_, _>>()?;
185
186 let manifest = SnapshotManifest {
187 schema_version: MANIFEST_SCHEMA_VERSION,
188 created_at: Utc::now().to_rfc3339(),
189 archives: entries,
190 user_data,
191 };
192 let manifest_bytes =
193 serde_json::to_vec_pretty(&manifest).context("serialising SnapshotManifest")?;
194
195 let f = File::create(&out_path).with_context(|| format!("creating {}", out_path.display()))?;
198 let gz = GzEncoder::new(f, Compression::default());
199 let mut tar = tar::Builder::new(gz);
200
201 for (snap, entry) in snaps.iter().zip(manifest.archives.iter()) {
202 tar.append_path_with_name(&snap.path, &entry.file)
203 .with_context(|| format!("appending {} as {}", snap.path.display(), entry.file))?;
204 }
205
206 {
207 let mut header = tar::Header::new_gnu();
208 header.set_size(manifest_bytes.len() as u64);
209 header.set_mode(0o644);
210 header.set_mtime(
211 std::time::SystemTime::now()
212 .duration_since(std::time::UNIX_EPOCH)
213 .map(|d| d.as_secs())
214 .unwrap_or(0),
215 );
216 header.set_cksum();
217 tar.append_data(&mut header, "manifest.json", manifest_bytes.as_slice())
218 .context("appending manifest.json")?;
219 }
220
221 let gz = tar.into_inner().context("finishing tar")?;
222 let mut f = gz.finish().context("finishing gzip")?;
223 f.flush().context("flushing bundle file")?;
224 drop(f);
225
226 let mut file = File::open(&out_path)
227 .with_context(|| format!("reading produced bundle {}", out_path.display()))?;
228 let mut sha256 = Sha256::new();
229 let size = io::copy(&mut file, &mut sha256).with_context(|| {
230 format!(
231 "can not copy from file {} to generate hash",
232 out_path.display()
233 )
234 })?;
235 let sha256 = hex::encode(sha256.finalize());
236
237 Ok(Bundle {
238 path: out_path,
239 sha256,
240 size,
241 })
242}
243
244pub fn untar_bundle(bundle_path: &Path, out_dir: &Path) -> anyhow::Result<()> {
246 std::fs::create_dir_all(out_dir)?;
247 let f = File::open(bundle_path)?;
248 let gz = GzDecoder::new(f);
249 let mut archive = Archive::new(gz);
250 archive.unpack(out_dir)?;
251 Ok(())
252}
253
254#[cfg(test)]
255mod tests {
256 use std::collections::BTreeSet;
257
258 use flate2::read::GzDecoder;
259 use serde_json::json;
260 use sha2::Digest;
261 use tar::Archive;
262
263 use super::*;
264
265 fn sha256_of(bytes: &[u8]) -> String {
266 hex::encode(sha2::Sha256::digest(bytes))
267 }
268
269 fn fake_snapshot(dir: &Path, name: &str, node_name: &str, bytes: &[u8]) -> NodeSnapshot {
272 let path = dir.join(name);
273 std::fs::write(&path, bytes).expect("write dummy archive");
274 NodeSnapshot {
275 path,
276 sha256: sha256_of(bytes),
277 size: bytes.len() as u64,
278 node_name: node_name.to_string(),
279 }
280 }
281
282 fn temp_dir() -> PathBuf {
283 let dir = std::env::temp_dir().join(format!("zombie-bundle-test-{}", uuid::Uuid::new_v4()));
284 std::fs::create_dir_all(&dir).expect("create temp dir");
285 dir
286 }
287
288 fn unpack(bundle: &Path, into: &Path) {
289 std::fs::create_dir_all(into).expect("create extract dir");
290 let f = std::fs::File::open(bundle).expect("open bundle");
291 Archive::new(GzDecoder::new(f))
292 .unpack(into)
293 .expect("unpack bundle");
294 }
295
296 #[test]
301 fn build_produces_bundle_and_manifest() {
302 let dir = temp_dir();
303 let relay_bytes = b"dummy-relay-db-contents".as_slice();
304 let para_bytes = b"dummy-para-db-contents-longer".as_slice();
305 let relay = fake_snapshot(&dir, "relaychain-db.tgz", "alice", relay_bytes);
306 let para = fake_snapshot(&dir, "parachain-db.tgz", "collator-1", para_bytes);
307
308 let bundle = BundleBuilder::new()
309 .add(relay.clone())
310 .add(para.clone())
311 .user_data(json!({ "snapshot_height": 42 }))
312 .build(dir.join("bundle.tar.gz"))
313 .expect("bundle builds");
314
315 assert!(bundle.path.is_file());
317 let on_disk = std::fs::read(&bundle.path).expect("read bundle");
318 assert_eq!(sha256_of(&on_disk), bundle.sha256);
319 assert_eq!(on_disk.len() as u64, bundle.size);
320
321 let extracted = dir.join("extracted");
323 unpack(&bundle.path, &extracted);
324 let entries: BTreeSet<String> = std::fs::read_dir(&extracted)
325 .expect("read extract dir")
326 .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
327 .collect();
328 assert_eq!(
329 entries,
330 BTreeSet::from([
331 "relaychain-db.tgz".to_string(),
332 "parachain-db.tgz".to_string(),
333 "manifest.json".to_string(),
334 ])
335 );
336
337 assert_eq!(
339 std::fs::read(extracted.join("relaychain-db.tgz")).unwrap(),
340 relay_bytes
341 );
342 assert_eq!(
343 std::fs::read(extracted.join("parachain-db.tgz")).unwrap(),
344 para_bytes
345 );
346
347 let manifest: SnapshotManifest =
349 serde_json::from_slice(&std::fs::read(extracted.join("manifest.json")).unwrap())
350 .expect("manifest deserialises");
351 assert_eq!(manifest.schema_version, MANIFEST_SCHEMA_VERSION);
352 assert!(!manifest.created_at.is_empty());
353 assert_eq!(manifest.user_data["snapshot_height"], json!(42));
354 assert_eq!(manifest.archives.len(), 2);
355
356 for (entry, snap) in manifest.archives.iter().zip([&relay, ¶]) {
357 assert_eq!(entry.file, snap.path.file_name().unwrap().to_string_lossy());
358 assert_eq!(entry.sha256, snap.sha256);
359 assert_eq!(entry.size, snap.size);
360 assert_eq!(entry.node_name, snap.node_name);
361 }
362
363 let _ = std::fs::remove_dir_all(&dir);
364 }
365
366 #[test]
367 fn single_archive_default_user_data() {
368 let dir = temp_dir();
369 let snap = fake_snapshot(&dir, "relaychain-db.tgz", "alice", b"x");
370
371 let bundle = BundleBuilder::new()
372 .add(snap)
373 .build(dir.join("bundle.tar.gz"))
374 .expect("bundle builds");
375
376 let extracted = dir.join("extracted");
377 unpack(&bundle.path, &extracted);
378 let entries: BTreeSet<String> = std::fs::read_dir(&extracted)
379 .unwrap()
380 .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
381 .collect();
382 assert_eq!(
383 entries,
384 BTreeSet::from(["relaychain-db.tgz".to_string(), "manifest.json".to_string()])
385 );
386
387 let manifest: SnapshotManifest =
388 serde_json::from_slice(&std::fs::read(extracted.join("manifest.json")).unwrap())
389 .expect("manifest deserialises");
390 assert_eq!(manifest.archives.len(), 1);
391 assert_eq!(manifest.user_data, serde_json::Value::Null);
392
393 let _ = std::fs::remove_dir_all(&dir);
394 }
395}