Skip to main content

foundry_evm/executors/invariant/
corpus.rs

1use crate::executors::{
2    Executor,
3    invariant::{InvariantTest, InvariantTestRun},
4};
5use alloy_dyn_abi::JsonAbiExt;
6use alloy_primitives::U256;
7use eyre::eyre;
8use foundry_config::InvariantConfig;
9use foundry_evm_fuzz::{
10    invariant::{BasicTxDetails, FuzzRunIdentifiedContracts},
11    strategies::fuzz_param_from_state,
12};
13use proptest::{
14    prelude::{Just, Rng, Strategy},
15    prop_oneof,
16    strategy::{BoxedStrategy, ValueTree},
17    test_runner::TestRunner,
18};
19use serde::Serialize;
20use std::{
21    fmt,
22    path::PathBuf,
23    time::{SystemTime, UNIX_EPOCH},
24};
25use uuid::Uuid;
26
27const METADATA_SUFFIX: &str = "metadata.json";
28const JSON_EXTENSION: &str = ".json";
29const FAVORABILITY_THRESHOLD: f64 = 0.3;
30
31/// Possible mutation strategies to apply on a call sequence.
32#[derive(Debug, Clone)]
33enum MutationType {
34    /// Splice original call sequence.
35    Splice,
36    /// Repeat selected call several times.
37    Repeat,
38    /// Interleave calls from two random call sequences.
39    Interleave,
40    /// Replace prefix of the original call sequence with new calls.
41    Prefix,
42    /// Replace suffix of the original call sequence with new calls.
43    Suffix,
44    /// ABI mutate random args of selected call in sequence.
45    Abi,
46}
47
48/// Holds Corpus information.
49#[derive(Serialize)]
50struct CorpusEntry {
51    // Unique corpus identifier.
52    uuid: Uuid,
53    // Total mutations of corpus as primary source.
54    total_mutations: usize,
55    // New coverage found as a result of mutating this corpus.
56    new_finds_produced: usize,
57    // Corpus call sequence.
58    #[serde(skip_serializing)]
59    tx_seq: Vec<BasicTxDetails>,
60    // Whether this corpus is favored, i.e. producing new finds more often than
61    // `FAVORABILITY_THRESHOLD`.
62    is_favored: bool,
63}
64
65impl CorpusEntry {
66    /// New corpus from given call sequence and corpus path to read uuid.
67    pub fn new(tx_seq: Vec<BasicTxDetails>, path: PathBuf) -> eyre::Result<Self> {
68        let uuid = if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
69            Uuid::try_from(stem.strip_suffix(JSON_EXTENSION).unwrap_or(stem).to_string())?
70        } else {
71            Uuid::new_v4()
72        };
73        Ok(Self { uuid, total_mutations: 0, new_finds_produced: 0, tx_seq, is_favored: false })
74    }
75
76    /// New corpus with given call sequence and new uuid.
77    pub fn from_tx_seq(tx_seq: Vec<BasicTxDetails>) -> Self {
78        Self {
79            uuid: Uuid::new_v4(),
80            total_mutations: 0,
81            new_finds_produced: 0,
82            tx_seq,
83            is_favored: false,
84        }
85    }
86}
87
88#[derive(Serialize, Default)]
89pub(crate) struct CorpusMetrics {
90    // Number of edges seen during the invariant run.
91    cumulative_edges_seen: usize,
92    // Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
93    cumulative_features_seen: usize,
94    // Number of corpus entries.
95    corpus_count: usize,
96    // Number of corpus entries that are favored.
97    favored_items: usize,
98}
99
100impl fmt::Display for CorpusMetrics {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        writeln!(f)?;
103        writeln!(f, "        - cumulative edges seen: {}", self.cumulative_edges_seen)?;
104        writeln!(f, "        - cumulative features seen: {}", self.cumulative_features_seen)?;
105        writeln!(f, "        - corpus count: {}", self.corpus_count)?;
106        write!(f, "        - favored items: {}", self.favored_items)?;
107        Ok(())
108    }
109}
110
111impl CorpusMetrics {
112    /// Records number of new edges or features explored during the campaign.
113    pub fn update_seen(&mut self, is_edge: bool) {
114        if is_edge {
115            self.cumulative_edges_seen += 1;
116        } else {
117            self.cumulative_features_seen += 1;
118        }
119    }
120
121    /// Updates campaign favored items.
122    pub fn update_favored(&mut self, is_favored: bool, corpus_favored: bool) {
123        if is_favored && !corpus_favored {
124            self.favored_items += 1;
125        } else if !is_favored && corpus_favored {
126            self.favored_items -= 1;
127        }
128    }
129}
130
131/// Invariant corpus manager.
132pub struct TxCorpusManager {
133    // Fuzzed calls generator.
134    tx_generator: BoxedStrategy<BasicTxDetails>,
135    // Call sequence mutation strategy type generator.
136    mutation_generator: BoxedStrategy<MutationType>,
137    // Path to invariant corpus directory. If None, sequences with new coverage are not persisted.
138    corpus_dir: Option<PathBuf>, // TODO consolidate into config
139    // Whether corpus to use gzip file compression and decompression.
140    corpus_gzip: bool,
141    // Number of mutations until entry marked as eligible to be flushed from in-memory corpus.
142    // Mutations will be performed at least `corpus_min_mutations` times.
143    corpus_min_mutations: usize,
144    // Number of corpus that won't be evicted from memory.
145    corpus_min_size: usize,
146    // In-memory corpus, populated from persisted files and current runs.
147    // Mutation is performed on these.
148    in_memory_corpus: Vec<CorpusEntry>,
149    // Identifier of current mutated entry.
150    current_mutated: Option<Uuid>,
151    // Number of failed replays from persisted corpus.
152    failed_replays: usize,
153    // Corpus metrics.
154    pub(crate) metrics: CorpusMetrics,
155    // Maximum value for fuzzed integers, used to simulate smaller integer types.
156    // Unsigned: [0, max], Signed: [-(max+1), max].
157    max_fuzz_int: Option<U256>,
158}
159
160impl TxCorpusManager {
161    pub fn new(
162        invariant_config: &InvariantConfig,
163        test_name: &String,
164        fuzzed_contracts: &FuzzRunIdentifiedContracts,
165        tx_generator: BoxedStrategy<BasicTxDetails>,
166        executor: &Executor,
167        history_map: &mut [u8],
168    ) -> eyre::Result<Self> {
169        let mutation_generator = prop_oneof![
170            Just(MutationType::Splice),
171            Just(MutationType::Repeat),
172            Just(MutationType::Interleave),
173            Just(MutationType::Prefix),
174            Just(MutationType::Suffix),
175            Just(MutationType::Abi),
176        ]
177        .boxed();
178        let mut in_memory_corpus = vec![];
179        let corpus_gzip = invariant_config.corpus_gzip;
180        let corpus_min_mutations = invariant_config.corpus_min_mutations;
181        let corpus_min_size = invariant_config.corpus_min_size;
182        let max_fuzz_int = invariant_config.max_fuzz_int;
183        let mut failed_replays = 0;
184
185        // Early return if corpus dir / coverage guided fuzzing not configured.
186        let Some(corpus_dir) = &invariant_config.corpus_dir else {
187            return Ok(Self {
188                tx_generator,
189                mutation_generator,
190                corpus_dir: None,
191                corpus_gzip,
192                corpus_min_mutations,
193                corpus_min_size,
194                in_memory_corpus,
195                current_mutated: None,
196                failed_replays,
197                metrics: CorpusMetrics::default(),
198                max_fuzz_int,
199            });
200        };
201
202        // Ensure corpus dir for invariant function is created.
203        let corpus_dir = corpus_dir.join(test_name);
204        if !corpus_dir.is_dir() {
205            foundry_common::fs::create_dir_all(&corpus_dir)?;
206        }
207
208        let fuzzed_contracts = fuzzed_contracts.targets.lock();
209        let mut metrics = CorpusMetrics::default();
210
211        for entry in std::fs::read_dir(&corpus_dir)? {
212            let path = entry?.path();
213            if path.is_file()
214                && let Some(name) = path.file_name().and_then(|s| s.to_str())
215            {
216                // Ignore metadata files
217                if name.contains(METADATA_SUFFIX) {
218                    continue;
219                }
220            }
221            metrics.corpus_count += 1;
222
223            let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) {
224                Some("gz") => foundry_common::fs::read_json_gzip_file::<Vec<BasicTxDetails>>(&path),
225                _ => foundry_common::fs::read_json_file::<Vec<BasicTxDetails>>(&path),
226            };
227
228            let Ok(tx_seq) = read_corpus_result else {
229                trace!(target: "corpus", "failed to load corpus from {}", path.display());
230                continue;
231            };
232
233            if !tx_seq.is_empty() {
234                // Warm up history map from loaded sequences.
235                let mut executor = executor.clone();
236                for tx in &tx_seq {
237                    let mut call_result = executor
238                        .call_raw(
239                            tx.sender,
240                            tx.call_details.target,
241                            tx.call_details.calldata.clone(),
242                            U256::ZERO,
243                        )
244                        .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?;
245
246                    if fuzzed_contracts.can_replay(tx) {
247                        let (new_coverage, is_edge) = call_result.merge_edge_coverage(history_map);
248                        if new_coverage {
249                            metrics.update_seen(is_edge);
250                        }
251
252                        executor.commit(&mut call_result);
253                    } else {
254                        failed_replays += 1;
255                    }
256                }
257
258                trace!(
259                    target: "corpus",
260                    "load sequence with len {} from corpus file {}",
261                    tx_seq.len(),
262                    path.display()
263                );
264
265                // Populate in memory corpus with sequence from corpus file.
266                in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?);
267            }
268        }
269
270        Ok(Self {
271            tx_generator,
272            mutation_generator,
273            corpus_dir: Some(corpus_dir),
274            corpus_gzip,
275            corpus_min_mutations,
276            corpus_min_size,
277            in_memory_corpus,
278            current_mutated: None,
279            failed_replays,
280            metrics,
281            max_fuzz_int,
282        })
283    }
284
285    /// Collects inputs from given invariant run, if new coverage produced.
286    /// Persists call sequence (if corpus directory is configured) and updates in-memory corpus.
287    pub fn collect_inputs(&mut self, test_run: &InvariantTestRun) {
288        // Early return if corpus dir / coverage guided fuzzing is not configured.
289        let Some(corpus_dir) = &self.corpus_dir else {
290            return;
291        };
292
293        // Update stats of current mutated primary corpus.
294        if let Some(uuid) = &self.current_mutated {
295            if let Some(corpus) =
296                self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid))
297            {
298                corpus.total_mutations += 1;
299                if test_run.new_coverage {
300                    corpus.new_finds_produced += 1
301                }
302                let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64)
303                    < FAVORABILITY_THRESHOLD;
304                self.metrics.update_favored(is_favored, corpus.is_favored);
305                corpus.is_favored = is_favored;
306
307                trace!(
308                    target: "corpus",
309                    "updated corpus {}, total mutations: {}, new finds: {}",
310                    corpus.uuid, corpus.total_mutations, corpus.new_finds_produced
311                );
312            }
313
314            self.current_mutated = None;
315        }
316
317        // Collect inputs only if current run produced new coverage.
318        if !test_run.new_coverage {
319            return;
320        }
321
322        let corpus = CorpusEntry::from_tx_seq(test_run.inputs.clone());
323        let corpus_uuid = corpus.uuid;
324
325        // Persist to disk if corpus dir is configured.
326        let write_result = if self.corpus_gzip {
327            foundry_common::fs::write_json_gzip_file(
328                corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(),
329                &corpus.tx_seq,
330            )
331        } else {
332            foundry_common::fs::write_json_file(
333                corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(),
334                &corpus.tx_seq,
335            )
336        };
337
338        if let Err(err) = write_result {
339            debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq);
340        } else {
341            trace!(
342                target: "corpus",
343                "persisted {} inputs for new coverage in {corpus_uuid} corpus",
344                &corpus.tx_seq.len()
345            );
346        }
347
348        // This includes reverting txs in the corpus and `can_continue` removes
349        // them. We want this as it is new coverage and may help reach the other branch.
350        self.metrics.corpus_count += 1;
351        self.in_memory_corpus.push(corpus);
352    }
353
354    /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than
355    /// configured max mutations value.
356    pub fn new_sequence(&mut self, test: &InvariantTest) -> eyre::Result<Vec<BasicTxDetails>> {
357        let mut new_seq = vec![];
358        let test_runner = &mut test.execution_data.borrow_mut().branch_runner;
359
360        // Early return with first_input only if corpus dir / coverage guided fuzzing not
361        // configured.
362        let Some(corpus_dir) = &self.corpus_dir else {
363            new_seq.push(self.new_tx(test_runner)?);
364            return Ok(new_seq);
365        };
366
367        if !self.in_memory_corpus.is_empty() {
368            // Flush oldest corpus mutated more than configured max mutations unless they are
369            // favored.
370            let should_evict = self.in_memory_corpus.len() > self.corpus_min_size.max(1);
371            if should_evict
372                && let Some(index) = self.in_memory_corpus.iter().position(|corpus| {
373                    corpus.total_mutations > self.corpus_min_mutations && !corpus.is_favored
374                })
375            {
376                let corpus = self.in_memory_corpus.get(index).unwrap();
377
378                let uuid = corpus.uuid;
379                debug!(target: "corpus", "evict corpus {uuid}");
380
381                // Flush to disk the seed metadata at the time of eviction.
382                let eviction_time = SystemTime::now()
383                    .duration_since(UNIX_EPOCH)
384                    .expect("Time went backwards")
385                    .as_secs();
386                foundry_common::fs::write_json_file(
387                    corpus_dir.join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")).as_path(),
388                    &corpus,
389                )?;
390
391                // Remove corpus from memory.
392                self.in_memory_corpus.remove(index);
393            }
394
395            let mutation_type = self
396                .mutation_generator
397                .new_tree(test_runner)
398                .expect("Could not generate mutation type")
399                .current();
400            let rng = test_runner.rng();
401            let corpus_len = self.in_memory_corpus.len();
402            let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)];
403            let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)];
404
405            match mutation_type {
406                MutationType::Splice => {
407                    trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid);
408
409                    self.current_mutated = Some(primary.uuid);
410
411                    let start1 = rng.random_range(0..primary.tx_seq.len());
412                    let end1 = rng.random_range(start1..primary.tx_seq.len());
413
414                    let start2 = rng.random_range(0..secondary.tx_seq.len());
415                    let end2 = rng.random_range(start2..secondary.tx_seq.len());
416
417                    for tx in primary.tx_seq.iter().take(end1).skip(start1) {
418                        new_seq.push(tx.clone());
419                    }
420                    for tx in secondary.tx_seq.iter().take(end2).skip(start2) {
421                        new_seq.push(tx.clone());
422                    }
423                }
424                MutationType::Repeat => {
425                    let corpus = if rng.random::<bool>() { primary } else { secondary };
426                    trace!(target: "corpus", "repeat {}", corpus.uuid);
427
428                    self.current_mutated = Some(corpus.uuid);
429
430                    new_seq = corpus.tx_seq.clone();
431                    let start = rng.random_range(0..corpus.tx_seq.len());
432                    let end = rng.random_range(start..corpus.tx_seq.len());
433                    let item_idx = rng.random_range(0..corpus.tx_seq.len());
434                    let repeated = vec![new_seq[item_idx].clone(); end - start];
435                    new_seq.splice(start..end, repeated);
436                }
437                MutationType::Interleave => {
438                    trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid);
439
440                    self.current_mutated = Some(primary.uuid);
441
442                    for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) {
443                        // chunks?
444                        let tx = if rng.random::<bool>() { tx1.clone() } else { tx2.clone() };
445                        new_seq.push(tx);
446                    }
447                }
448                MutationType::Prefix => {
449                    let corpus = if rng.random::<bool>() { primary } else { secondary };
450                    trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid);
451
452                    self.current_mutated = Some(corpus.uuid);
453
454                    new_seq = corpus.tx_seq.clone();
455                    for i in 0..rng.random_range(0..=new_seq.len()) {
456                        new_seq[i] = self.new_tx(test_runner)?;
457                    }
458                }
459                MutationType::Suffix => {
460                    let corpus = if rng.random::<bool>() { primary } else { secondary };
461                    trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid);
462
463                    self.current_mutated = Some(corpus.uuid);
464
465                    new_seq = corpus.tx_seq.clone();
466                    for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len()
467                    {
468                        new_seq[i] = self.new_tx(test_runner)?;
469                    }
470                }
471                MutationType::Abi => {
472                    let targets = test.targeted_contracts.targets.lock();
473                    let corpus = if rng.random::<bool>() { primary } else { secondary };
474                    trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid);
475
476                    self.current_mutated = Some(corpus.uuid);
477
478                    new_seq = corpus.tx_seq.clone();
479
480                    let idx = rng.random_range(0..new_seq.len());
481                    let tx = new_seq.get_mut(idx).unwrap();
482                    if let (_, Some(function)) = targets.fuzzed_artifacts(tx) {
483                        // TODO add call_value to call details and mutate it as well as sender some
484                        // of the time
485                        if !function.inputs.is_empty() {
486                            let mut new_function = function.clone();
487                            let mut arg_mutation_rounds =
488                                rng.random_range(0..=function.inputs.len()).max(1);
489                            let round_arg_idx: Vec<usize> = if function.inputs.len() <= 1 {
490                                vec![0]
491                            } else {
492                                (0..arg_mutation_rounds)
493                                    .map(|_| {
494                                        test_runner.rng().random_range(0..function.inputs.len())
495                                    })
496                                    .collect()
497                            };
498                            // TODO mutation strategy for individual ABI types
499                            let mut prev_inputs = function
500                                .abi_decode_input(&tx.call_details.calldata[4..])
501                                .expect("fuzzed_artifacts returned wrong sig");
502                            // For now, only new inputs are generated, no existing inputs are
503                            // mutated.
504                            let max_fuzz_int = self.max_fuzz_int;
505                            let mut gen_input = |input: &alloy_json_abi::Param| {
506                                fuzz_param_from_state(
507                                    &input.selector_type().parse().unwrap(),
508                                    &test.fuzz_state,
509                                    max_fuzz_int,
510                                )
511                                .new_tree(test_runner)
512                                .expect("Could not generate case")
513                                .current()
514                            };
515
516                            while arg_mutation_rounds > 0 {
517                                let idx = round_arg_idx[arg_mutation_rounds - 1];
518                                let input = new_function
519                                    .inputs
520                                    .get_mut(idx)
521                                    .expect("Could not get input to mutate");
522                                let new_input = gen_input(input);
523                                prev_inputs[idx] = new_input;
524                                arg_mutation_rounds -= 1;
525                            }
526
527                            tx.call_details.calldata = new_function
528                                .abi_encode_input(&prev_inputs)
529                                .map_err(|e| eyre!(e.to_string()))?
530                                .into();
531                        }
532                    }
533                }
534            }
535        }
536
537        // Make sure sequence contains at least one tx to start fuzzing from.
538        if new_seq.is_empty() {
539            new_seq.push(self.new_tx(test_runner)?);
540        }
541        trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len());
542
543        Ok(new_seq)
544    }
545
546    /// Returns the next call to be used in call sequence.
547    /// If coverage guided fuzzing is not configured or if previous input was discarded then this is
548    /// a new tx from strategy.
549    /// If running with coverage guided fuzzing it returns a new call only when sequence
550    /// does not have enough entries, or randomly. Otherwise, returns the next call from initial
551    /// sequence.
552    pub fn generate_next_input(
553        &mut self,
554        test: &InvariantTest,
555        sequence: &[BasicTxDetails],
556        discarded: bool,
557        depth: usize,
558    ) -> eyre::Result<BasicTxDetails> {
559        let test_runner = &mut test.execution_data.borrow_mut().branch_runner;
560
561        // Early return with new input if corpus dir / coverage guided fuzzing not configured or if
562        // call was discarded.
563        if self.corpus_dir.is_none() || discarded {
564            return self.new_tx(test_runner);
565        }
566
567        // When running with coverage guided fuzzing enabled then generate new sequence if initial
568        // sequence's length is less than depth or randomly, to occasionally intermix new txs.
569        if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) {
570            return self.new_tx(test_runner);
571        }
572
573        // Continue with the next call initial sequence
574        Ok(sequence[depth].clone())
575    }
576
577    /// Generates single call from invariant strategy.
578    pub fn new_tx(&mut self, test_runner: &mut TestRunner) -> eyre::Result<BasicTxDetails> {
579        Ok(self
580            .tx_generator
581            .new_tree(test_runner)
582            .map_err(|_| eyre!("Could not generate case"))?
583            .current())
584    }
585
586    /// Returns campaign failed replays.
587    pub fn failed_replays(self) -> usize {
588        self.failed_replays
589    }
590
591    /// Updates seen edges or features metrics.
592    pub fn update_seen_metrics(&mut self, is_edge: bool) {
593        self.metrics.update_seen(is_edge);
594    }
595}