wasmparser/
parser.rs

1use crate::CoreTypeSectionReader;
2use crate::{
3    limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
4    ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
5    ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
6    ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
7    GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
8    SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
9};
10use std::convert::TryInto;
11use std::fmt;
12use std::iter;
13use std::ops::Range;
14
15pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
16
17// Note that this started at `0xa` and we're incrementing up from there. When
18// the component model is stabilized this will become 0x1. The changes here are:
19//
20// * [????-??-??] 0xa - original version
21// * [2022-01-05] 0xb - `export` introduces an alias
22// * [2022-02-06] 0xc - `export` has an optional type ascribed to it
23pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xc;
24
25/// The supported encoding formats for the parser.
26#[derive(Debug, Clone, Copy, Eq, PartialEq)]
27pub enum Encoding {
28    /// The encoding format is a WebAssembly module.
29    Module,
30    /// The encoding format is a WebAssembly component.
31    Component,
32}
33
34/// An incremental parser of a binary WebAssembly module or component.
35///
36/// This type is intended to be used to incrementally parse a WebAssembly module
37/// or component as bytes become available for the module. This can also be used
38/// to parse modules or components that are already entirely resident within memory.
39///
40/// This primary function for a parser is the [`Parser::parse`] function which
41/// will incrementally consume input. You can also use the [`Parser::parse_all`]
42/// function to parse a module or component that is entirely resident in memory.
43#[derive(Debug, Clone)]
44pub struct Parser {
45    state: State,
46    offset: u64,
47    max_size: u64,
48    encoding: Encoding,
49}
50
51#[derive(Debug, Clone)]
52enum State {
53    Header,
54    SectionStart,
55    FunctionBody { remaining: u32, len: u32 },
56}
57
58/// A successful return payload from [`Parser::parse`].
59///
60/// On success one of two possible values can be returned, either that more data
61/// is needed to continue parsing or a chunk of the input was parsed, indicating
62/// how much of it was parsed.
63#[derive(Debug)]
64pub enum Chunk<'a> {
65    /// This can be returned at any time and indicates that more data is needed
66    /// to proceed with parsing. Zero bytes were consumed from the input to
67    /// [`Parser::parse`]. The `usize` value here is a hint as to how many more
68    /// bytes are needed to continue parsing.
69    NeedMoreData(u64),
70
71    /// A chunk was successfully parsed.
72    Parsed {
73        /// This many bytes of the `data` input to [`Parser::parse`] were
74        /// consumed to produce `payload`.
75        consumed: usize,
76        /// The value that we actually parsed.
77        payload: Payload<'a>,
78    },
79}
80
81/// Values that can be parsed from a WebAssembly module or component.
82///
83/// This enumeration is all possible chunks of pieces that can be parsed by a
84/// [`Parser`] from a binary WebAssembly module or component. Note that for many
85/// sections the entire section is parsed all at once, whereas other functions,
86/// like the code section, are parsed incrementally. This is a distinction where some
87/// sections, like the type section, are required to be fully resident in memory
88/// (fully downloaded) before proceeding. Other sections, like the code section,
89/// can be processed in a streaming fashion where each function is extracted
90/// individually so it can possibly be shipped to another thread while you wait
91/// for more functions to get downloaded.
92///
93/// Note that payloads, when returned, do not indicate that the module or component
94/// is valid. For example when you receive a `Payload::TypeSection` the type
95/// section itself has not yet actually been parsed. The reader returned will be
96/// able to parse it, but you'll have to actually iterate the reader to do the
97/// full parse. Each payload returned is intended to be a *window* into the
98/// original `data` passed to [`Parser::parse`] which can be further processed
99/// if necessary.
100pub enum Payload<'a> {
101    /// Indicates the header of a WebAssembly module or component.
102    Version {
103        /// The version number found in the header.
104        num: u16,
105        /// The encoding format being parsed.
106        encoding: Encoding,
107        /// The range of bytes that were parsed to consume the header of the
108        /// module or component. Note that this range is relative to the start
109        /// of the byte stream.
110        range: Range<usize>,
111    },
112
113    /// A module type section was received and the provided reader can be
114    /// used to parse the contents of the type section.
115    TypeSection(TypeSectionReader<'a>),
116    /// A module import section was received and the provided reader can be
117    /// used to parse the contents of the import section.
118    ImportSection(ImportSectionReader<'a>),
119    /// A module function section was received and the provided reader can be
120    /// used to parse the contents of the function section.
121    FunctionSection(FunctionSectionReader<'a>),
122    /// A module table section was received and the provided reader can be
123    /// used to parse the contents of the table section.
124    TableSection(TableSectionReader<'a>),
125    /// A module memory section was received and the provided reader can be
126    /// used to parse the contents of the memory section.
127    MemorySection(MemorySectionReader<'a>),
128    /// A module tag section was received, and the provided reader can be
129    /// used to parse the contents of the tag section.
130    TagSection(TagSectionReader<'a>),
131    /// A module global section was received and the provided reader can be
132    /// used to parse the contents of the global section.
133    GlobalSection(GlobalSectionReader<'a>),
134    /// A module export section was received, and the provided reader can be
135    /// used to parse the contents of the export section.
136    ExportSection(ExportSectionReader<'a>),
137    /// A module start section was received.
138    StartSection {
139        /// The start function index
140        func: u32,
141        /// The range of bytes that specify the `func` field, specified in
142        /// offsets relative to the start of the byte stream.
143        range: Range<usize>,
144    },
145    /// A module element section was received and the provided reader can be
146    /// used to parse the contents of the element section.
147    ElementSection(ElementSectionReader<'a>),
148    /// A module data count section was received.
149    DataCountSection {
150        /// The number of data segments.
151        count: u32,
152        /// The range of bytes that specify the `count` field, specified in
153        /// offsets relative to the start of the byte stream.
154        range: Range<usize>,
155    },
156    /// A module data section was received and the provided reader can be
157    /// used to parse the contents of the data section.
158    DataSection(DataSectionReader<'a>),
159    /// Indicator of the start of the code section of a WebAssembly module.
160    ///
161    /// This entry is returned whenever the code section starts. The `count`
162    /// field indicates how many entries are in this code section. After
163    /// receiving this start marker you're guaranteed that the next `count`
164    /// items will be either `CodeSectionEntry` or an error will be returned.
165    ///
166    /// This, unlike other sections, is intended to be used for streaming the
167    /// contents of the code section. The code section is not required to be
168    /// fully resident in memory when we parse it. Instead a [`Parser`] is
169    /// capable of parsing piece-by-piece of a code section.
170    CodeSectionStart {
171        /// The number of functions in this section.
172        count: u32,
173        /// The range of bytes that represent this section, specified in
174        /// offsets relative to the start of the byte stream.
175        range: Range<usize>,
176        /// The size, in bytes, of the remaining contents of this section.
177        ///
178        /// This can be used in combination with [`Parser::skip_section`]
179        /// where the caller will know how many bytes to skip before feeding
180        /// bytes into `Parser` again.
181        size: u32,
182    },
183    /// An entry of the code section, a function, was parsed from a WebAssembly
184    /// module.
185    ///
186    /// This entry indicates that a function was successfully received from the
187    /// code section, and the payload here is the window into the original input
188    /// where the function resides. Note that the function itself has not been
189    /// parsed, it's only been outlined. You'll need to process the
190    /// `FunctionBody` provided to test whether it parses and/or is valid.
191    CodeSectionEntry(FunctionBody<'a>),
192
193    /// A core module section was received and the provided parser can be
194    /// used to parse the nested module.
195    ///
196    /// This variant is special in that it returns a sub-`Parser`. Upon
197    /// receiving a `ModuleSection` it is expected that the returned
198    /// `Parser` will be used instead of the parent `Parser` until the parse has
199    /// finished. You'll need to feed data into the `Parser` returned until it
200    /// returns `Payload::End`. After that you'll switch back to the parent
201    /// parser to resume parsing the rest of the current component.
202    ///
203    /// Note that binaries will not be parsed correctly if you feed the data for
204    /// a nested module into the parent [`Parser`].
205    ModuleSection {
206        /// The parser for the nested module.
207        parser: Parser,
208        /// The range of bytes that represent the nested module in the
209        /// original byte stream.
210        range: Range<usize>,
211    },
212    /// A core instance section was received and the provided parser can be
213    /// used to parse the contents of the core instance section.
214    ///
215    /// Currently this section is only parsed in a component.
216    InstanceSection(InstanceSectionReader<'a>),
217    /// A core type section was received and the provided parser can be
218    /// used to parse the contents of the core type section.
219    ///
220    /// Currently this section is only parsed in a component.
221    CoreTypeSection(CoreTypeSectionReader<'a>),
222    /// A component section from a WebAssembly component was received and the
223    /// provided parser can be used to parse the nested component.
224    ///
225    /// This variant is special in that it returns a sub-`Parser`. Upon
226    /// receiving a `ComponentSection` it is expected that the returned
227    /// `Parser` will be used instead of the parent `Parser` until the parse has
228    /// finished. You'll need to feed data into the `Parser` returned until it
229    /// returns `Payload::End`. After that you'll switch back to the parent
230    /// parser to resume parsing the rest of the current component.
231    ///
232    /// Note that binaries will not be parsed correctly if you feed the data for
233    /// a nested component into the parent [`Parser`].
234    ComponentSection {
235        /// The parser for the nested component.
236        parser: Parser,
237        /// The range of bytes that represent the nested component in the
238        /// original byte stream.
239        range: Range<usize>,
240    },
241    /// A component instance section was received and the provided reader can be
242    /// used to parse the contents of the component instance section.
243    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
244    /// A component alias section was received and the provided reader can be
245    /// used to parse the contents of the component alias section.
246    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
247    /// A component type section was received and the provided reader can be
248    /// used to parse the contents of the component type section.
249    ComponentTypeSection(ComponentTypeSectionReader<'a>),
250    /// A component canonical section was received and the provided reader can be
251    /// used to parse the contents of the component canonical section.
252    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
253    /// A component start section was received.
254    ComponentStartSection {
255        /// The start function description.
256        start: ComponentStartFunction,
257        /// The range of bytes that specify the `start` field.
258        range: Range<usize>,
259    },
260    /// A component import section was received and the provided reader can be
261    /// used to parse the contents of the component import section.
262    ComponentImportSection(ComponentImportSectionReader<'a>),
263    /// A component export section was received, and the provided reader can be
264    /// used to parse the contents of the component export section.
265    ComponentExportSection(ComponentExportSectionReader<'a>),
266
267    /// A module or component custom section was received.
268    CustomSection(CustomSectionReader<'a>),
269
270    /// An unknown section was found.
271    ///
272    /// This variant is returned for all unknown sections encountered. This
273    /// likely wants to be interpreted as an error by consumers of the parser,
274    /// but this can also be used to parse sections currently unsupported by
275    /// the parser.
276    UnknownSection {
277        /// The 8-bit identifier for this section.
278        id: u8,
279        /// The contents of this section.
280        contents: &'a [u8],
281        /// The range of bytes, relative to the start of the original data
282        /// stream, that the contents of this section reside in.
283        range: Range<usize>,
284    },
285
286    /// The end of the WebAssembly module or component was reached.
287    ///
288    /// The value is the offset in the input byte stream where the end
289    /// was reached.
290    End(usize),
291}
292
293const CUSTOM_SECTION: u8 = 0;
294const TYPE_SECTION: u8 = 1;
295const IMPORT_SECTION: u8 = 2;
296const FUNCTION_SECTION: u8 = 3;
297const TABLE_SECTION: u8 = 4;
298const MEMORY_SECTION: u8 = 5;
299const GLOBAL_SECTION: u8 = 6;
300const EXPORT_SECTION: u8 = 7;
301const START_SECTION: u8 = 8;
302const ELEMENT_SECTION: u8 = 9;
303const CODE_SECTION: u8 = 10;
304const DATA_SECTION: u8 = 11;
305const DATA_COUNT_SECTION: u8 = 12;
306const TAG_SECTION: u8 = 13;
307
308const COMPONENT_MODULE_SECTION: u8 = 1;
309const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
310const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
311const COMPONENT_SECTION: u8 = 4;
312const COMPONENT_INSTANCE_SECTION: u8 = 5;
313const COMPONENT_ALIAS_SECTION: u8 = 6;
314const COMPONENT_TYPE_SECTION: u8 = 7;
315const COMPONENT_CANONICAL_SECTION: u8 = 8;
316const COMPONENT_START_SECTION: u8 = 9;
317const COMPONENT_IMPORT_SECTION: u8 = 10;
318const COMPONENT_EXPORT_SECTION: u8 = 11;
319
320impl Parser {
321    /// Creates a new parser.
322    ///
323    /// Reports errors and ranges relative to `offset` provided, where `offset`
324    /// is some logical offset within the input stream that we're parsing.
325    pub fn new(offset: u64) -> Parser {
326        Parser {
327            state: State::Header,
328            offset,
329            max_size: u64::MAX,
330            // Assume the encoding is a module until we know otherwise
331            encoding: Encoding::Module,
332        }
333    }
334
335    /// Attempts to parse a chunk of data.
336    ///
337    /// This method will attempt to parse the next incremental portion of a
338    /// WebAssembly binary. Data available for the module or component is
339    /// provided as `data`, and the data can be incomplete if more data has yet
340    /// to arrive. The `eof` flag indicates whether more data will ever be received.
341    ///
342    /// There are two ways parsing can succeed with this method:
343    ///
344    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
345    ///   in `data` to parse a payload. The caller needs to wait for more data to
346    ///   be available in this situation before calling this method again. It is
347    ///   guaranteed that this is only returned if `eof` is `false`.
348    ///
349    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
350    ///   successfully parsed. The payload is available in this variant of what
351    ///   was parsed, and this also indicates how many bytes of `data` was
352    ///   consumed. It's expected that the caller will not provide these bytes
353    ///   back to the [`Parser`] again.
354    ///
355    /// Note that all `Chunk` return values are connected, with a lifetime, to
356    /// the input buffer. Each parsed chunk borrows the input buffer and is a
357    /// view into it for successfully parsed chunks.
358    ///
359    /// It is expected that you'll call this method until `Payload::End` is
360    /// reached, at which point you're guaranteed that the parse has completed.
361    /// Note that complete parsing, for the top-level module or component,
362    /// implies that `data` is empty and `eof` is `true`.
363    ///
364    /// # Errors
365    ///
366    /// Parse errors are returned as an `Err`. Errors can happen when the
367    /// structure of the data is unexpected or if sections are too large for
368    /// example. Note that errors are not returned for malformed *contents* of
369    /// sections here. Sections are generally not individually parsed and each
370    /// returned [`Payload`] needs to be iterated over further to detect all
371    /// errors.
372    ///
373    /// # Examples
374    ///
375    /// An example of reading a wasm file from a stream (`std::io::Read`) and
376    /// incrementally parsing it.
377    ///
378    /// ```
379    /// use std::io::Read;
380    /// use anyhow::Result;
381    /// use wasmparser::{Parser, Chunk, Payload::*};
382    ///
383    /// fn parse(mut reader: impl Read) -> Result<()> {
384    ///     let mut buf = Vec::new();
385    ///     let mut parser = Parser::new(0);
386    ///     let mut eof = false;
387    ///     let mut stack = Vec::new();
388    ///
389    ///     loop {
390    ///         let (payload, consumed) = match parser.parse(&buf, eof)? {
391    ///             Chunk::NeedMoreData(hint) => {
392    ///                 assert!(!eof); // otherwise an error would be returned
393    ///
394    ///                 // Use the hint to preallocate more space, then read
395    ///                 // some more data into our buffer.
396    ///                 //
397    ///                 // Note that the buffer management here is not ideal,
398    ///                 // but it's compact enough to fit in an example!
399    ///                 let len = buf.len();
400    ///                 buf.extend((0..hint).map(|_| 0u8));
401    ///                 let n = reader.read(&mut buf[len..])?;
402    ///                 buf.truncate(len + n);
403    ///                 eof = n == 0;
404    ///                 continue;
405    ///             }
406    ///
407    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
408    ///         };
409    ///
410    ///         match payload {
411    ///             // Sections for WebAssembly modules
412    ///             Version { .. } => { /* ... */ }
413    ///             TypeSection(_) => { /* ... */ }
414    ///             ImportSection(_) => { /* ... */ }
415    ///             FunctionSection(_) => { /* ... */ }
416    ///             TableSection(_) => { /* ... */ }
417    ///             MemorySection(_) => { /* ... */ }
418    ///             TagSection(_) => { /* ... */ }
419    ///             GlobalSection(_) => { /* ... */ }
420    ///             ExportSection(_) => { /* ... */ }
421    ///             StartSection { .. } => { /* ... */ }
422    ///             ElementSection(_) => { /* ... */ }
423    ///             DataCountSection { .. } => { /* ... */ }
424    ///             DataSection(_) => { /* ... */ }
425    ///
426    ///             // Here we know how many functions we'll be receiving as
427    ///             // `CodeSectionEntry`, so we can prepare for that, and
428    ///             // afterwards we can parse and handle each function
429    ///             // individually.
430    ///             CodeSectionStart { .. } => { /* ... */ }
431    ///             CodeSectionEntry(body) => {
432    ///                 // here we can iterate over `body` to parse the function
433    ///                 // and its locals
434    ///             }
435    ///
436    ///             // Sections for WebAssembly components
437    ///             ModuleSection { .. } => { /* ... */ }
438    ///             InstanceSection(_) => { /* ... */ }
439    ///             CoreTypeSection(_) => { /* ... */ }
440    ///             ComponentSection { .. } => { /* ... */ }
441    ///             ComponentInstanceSection(_) => { /* ... */ }
442    ///             ComponentAliasSection(_) => { /* ... */ }
443    ///             ComponentTypeSection(_) => { /* ... */ }
444    ///             ComponentCanonicalSection(_) => { /* ... */ }
445    ///             ComponentStartSection { .. } => { /* ... */ }
446    ///             ComponentImportSection(_) => { /* ... */ }
447    ///             ComponentExportSection(_) => { /* ... */ }
448    ///
449    ///             CustomSection(_) => { /* ... */ }
450    ///
451    ///             // most likely you'd return an error here
452    ///             UnknownSection { id, .. } => { /* ... */ }
453    ///
454    ///             // Once we've reached the end of a parser we either resume
455    ///             // at the parent parser or we break out of the loop because
456    ///             // we're done.
457    ///             End(_) => {
458    ///                 if let Some(parent_parser) = stack.pop() {
459    ///                     parser = parent_parser;
460    ///                 } else {
461    ///                     break;
462    ///                 }
463    ///             }
464    ///         }
465    ///
466    ///         // once we're done processing the payload we can forget the
467    ///         // original.
468    ///         buf.drain(..consumed);
469    ///     }
470    ///
471    ///     Ok(())
472    /// }
473    ///
474    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
475    /// ```
476    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
477        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
478            (&data[..(self.max_size as usize)], true)
479        } else {
480            (data, eof)
481        };
482        // TODO: thread through `offset: u64` to `BinaryReader`, remove
483        // the cast here.
484        let mut reader = BinaryReader::new_with_offset(data, self.offset as usize);
485        match self.parse_reader(&mut reader, eof) {
486            Ok(payload) => {
487                // Be sure to update our offset with how far we got in the
488                // reader
489                self.offset += usize_to_u64(reader.position);
490                self.max_size -= usize_to_u64(reader.position);
491                Ok(Chunk::Parsed {
492                    consumed: reader.position,
493                    payload,
494                })
495            }
496            Err(e) => {
497                // If we're at EOF then there's no way we can recover from any
498                // error, so continue to propagate it.
499                if eof {
500                    return Err(e);
501                }
502
503                // If our error doesn't look like it can be resolved with more
504                // data being pulled down, then propagate it, otherwise switch
505                // the error to "feed me please"
506                match e.inner.needed_hint {
507                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
508                    None => Err(e),
509                }
510            }
511        }
512    }
513
514    fn parse_reader<'a>(
515        &mut self,
516        reader: &mut BinaryReader<'a>,
517        eof: bool,
518    ) -> Result<Payload<'a>> {
519        use Payload::*;
520
521        match self.state {
522            State::Header => {
523                const KIND_MODULE: u16 = 0x00;
524                const KIND_COMPONENT: u16 = 0x01;
525
526                let start = reader.original_position();
527                let header_version = reader.read_header_version()?;
528                self.encoding = match (header_version >> 16) as u16 {
529                    KIND_MODULE => Encoding::Module,
530                    KIND_COMPONENT => Encoding::Component,
531                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
532                };
533                let num = header_version as u16;
534                self.state = State::SectionStart;
535                Ok(Version {
536                    num,
537                    encoding: self.encoding,
538                    range: start..reader.original_position(),
539                })
540            }
541            State::SectionStart => {
542                // If we're at eof and there are no bytes in our buffer, then
543                // that means we reached the end of the data since it's
544                // just a bunch of sections concatenated after the header.
545                if eof && reader.bytes_remaining() == 0 {
546                    return Ok(Payload::End(reader.original_position()));
547                }
548
549                let id_pos = reader.position;
550                let id = reader.read_u8()?;
551                if id & 0x80 != 0 {
552                    return Err(BinaryReaderError::new("malformed section id", id_pos));
553                }
554                let len_pos = reader.original_position();
555                let mut len = reader.read_var_u32()?;
556
557                // Test to make sure that this section actually fits within
558                // `Parser::max_size`. This doesn't matter for top-level modules
559                // but it is required for nested modules/components to correctly ensure
560                // that all sections live entirely within their section of the
561                // file.
562                let section_overflow = self
563                    .max_size
564                    .checked_sub(usize_to_u64(reader.position))
565                    .and_then(|s| s.checked_sub(len.into()))
566                    .is_none();
567                if section_overflow {
568                    return Err(BinaryReaderError::new("section too large", len_pos));
569                }
570
571                match (self.encoding, id) {
572                    // Sections for both modules and components.
573                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
574
575                    // Module sections
576                    (Encoding::Module, TYPE_SECTION) => {
577                        section(reader, len, TypeSectionReader::new, TypeSection)
578                    }
579                    (Encoding::Module, IMPORT_SECTION) => {
580                        section(reader, len, ImportSectionReader::new, ImportSection)
581                    }
582                    (Encoding::Module, FUNCTION_SECTION) => {
583                        section(reader, len, FunctionSectionReader::new, FunctionSection)
584                    }
585                    (Encoding::Module, TABLE_SECTION) => {
586                        section(reader, len, TableSectionReader::new, TableSection)
587                    }
588                    (Encoding::Module, MEMORY_SECTION) => {
589                        section(reader, len, MemorySectionReader::new, MemorySection)
590                    }
591                    (Encoding::Module, GLOBAL_SECTION) => {
592                        section(reader, len, GlobalSectionReader::new, GlobalSection)
593                    }
594                    (Encoding::Module, EXPORT_SECTION) => {
595                        section(reader, len, ExportSectionReader::new, ExportSection)
596                    }
597                    (Encoding::Module, START_SECTION) => {
598                        let (func, range) = single_item(reader, len, "start")?;
599                        Ok(StartSection { func, range })
600                    }
601                    (Encoding::Module, ELEMENT_SECTION) => {
602                        section(reader, len, ElementSectionReader::new, ElementSection)
603                    }
604                    (Encoding::Module, CODE_SECTION) => {
605                        let start = reader.original_position();
606                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
607                        let range = start..reader.original_position() + len as usize;
608                        self.state = State::FunctionBody {
609                            remaining: count,
610                            len,
611                        };
612                        Ok(CodeSectionStart {
613                            count,
614                            range,
615                            size: len,
616                        })
617                    }
618                    (Encoding::Module, DATA_SECTION) => {
619                        section(reader, len, DataSectionReader::new, DataSection)
620                    }
621                    (Encoding::Module, DATA_COUNT_SECTION) => {
622                        let (count, range) = single_item(reader, len, "data count")?;
623                        Ok(DataCountSection { count, range })
624                    }
625                    (Encoding::Module, TAG_SECTION) => {
626                        section(reader, len, TagSectionReader::new, TagSection)
627                    }
628
629                    // Component sections
630                    (Encoding::Component, COMPONENT_MODULE_SECTION)
631                    | (Encoding::Component, COMPONENT_SECTION) => {
632                        if len as usize > MAX_WASM_MODULE_SIZE {
633                            bail!(
634                                len_pos,
635                                "{} section is too large",
636                                if id == 1 { "module" } else { "component " }
637                            );
638                        }
639
640                        let range =
641                            reader.original_position()..reader.original_position() + len as usize;
642                        self.max_size -= u64::from(len);
643                        self.offset += u64::from(len);
644                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
645                        parser.max_size = len.into();
646
647                        Ok(match id {
648                            1 => ModuleSection { parser, range },
649                            4 => ComponentSection { parser, range },
650                            _ => unreachable!(),
651                        })
652                    }
653                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
654                        section(reader, len, InstanceSectionReader::new, InstanceSection)
655                    }
656                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
657                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
658                    }
659                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
660                        reader,
661                        len,
662                        ComponentInstanceSectionReader::new,
663                        ComponentInstanceSection,
664                    ),
665                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
666                        section(reader, len, SectionLimited::new, ComponentAliasSection)
667                    }
668                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
669                        reader,
670                        len,
671                        ComponentTypeSectionReader::new,
672                        ComponentTypeSection,
673                    ),
674                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
675                        reader,
676                        len,
677                        ComponentCanonicalSectionReader::new,
678                        ComponentCanonicalSection,
679                    ),
680                    (Encoding::Component, COMPONENT_START_SECTION) => {
681                        let (start, range) = single_item(reader, len, "component start")?;
682                        Ok(ComponentStartSection { start, range })
683                    }
684                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
685                        reader,
686                        len,
687                        ComponentImportSectionReader::new,
688                        ComponentImportSection,
689                    ),
690                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
691                        reader,
692                        len,
693                        ComponentExportSectionReader::new,
694                        ComponentExportSection,
695                    ),
696                    (_, id) => {
697                        let offset = reader.original_position();
698                        let contents = reader.read_bytes(len as usize)?;
699                        let range = offset..offset + len as usize;
700                        Ok(UnknownSection {
701                            id,
702                            contents,
703                            range,
704                        })
705                    }
706                }
707            }
708
709            // Once we hit 0 remaining incrementally parsed items, with 0
710            // remaining bytes in each section, we're done and can switch back
711            // to parsing sections.
712            State::FunctionBody {
713                remaining: 0,
714                len: 0,
715            } => {
716                self.state = State::SectionStart;
717                self.parse_reader(reader, eof)
718            }
719
720            // ... otherwise trailing bytes with no remaining entries in these
721            // sections indicates an error.
722            State::FunctionBody { remaining: 0, len } => {
723                debug_assert!(len > 0);
724                let offset = reader.original_position();
725                Err(BinaryReaderError::new(
726                    "trailing bytes at end of section",
727                    offset,
728                ))
729            }
730
731            // Functions are relatively easy to parse when we know there's at
732            // least one remaining and at least one byte available to read
733            // things.
734            //
735            // We use the remaining length try to read a u32 size of the
736            // function, and using that size we require the entire function be
737            // resident in memory. This means that we're reading whole chunks of
738            // functions at a time.
739            //
740            // Limiting via `Parser::max_size` (nested parsing) happens above in
741            // `fn parse`, and limiting by our section size happens via
742            // `delimited`. Actual parsing of the function body is delegated to
743            // the caller to iterate over the `FunctionBody` structure.
744            State::FunctionBody { remaining, mut len } => {
745                let body = delimited(reader, &mut len, |r| {
746                    let size = r.read_var_u32()?;
747                    let offset = r.original_position();
748                    Ok(FunctionBody::new(offset, r.read_bytes(size as usize)?))
749                })?;
750                self.state = State::FunctionBody {
751                    remaining: remaining - 1,
752                    len,
753                };
754                Ok(CodeSectionEntry(body))
755            }
756        }
757    }
758
759    /// Convenience function that can be used to parse a module or component
760    /// that is entirely resident in memory.
761    ///
762    /// This function will parse the `data` provided as a WebAssembly module
763    /// or component.
764    ///
765    /// Note that when this function yields sections that provide parsers,
766    /// no further action is required for those sections as payloads from
767    /// those parsers will be automatically returned.
768    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
769        let mut stack = Vec::new();
770        let mut cur = self;
771        let mut done = false;
772        iter::from_fn(move || {
773            if done {
774                return None;
775            }
776            let payload = match cur.parse(data, true) {
777                // Propagate all errors
778                Err(e) => {
779                    done = true;
780                    return Some(Err(e));
781                }
782
783                // This isn't possible because `eof` is always true.
784                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
785
786                Ok(Chunk::Parsed { payload, consumed }) => {
787                    data = &data[consumed..];
788                    payload
789                }
790            };
791
792            match &payload {
793                Payload::ModuleSection { parser, .. }
794                | Payload::ComponentSection { parser, .. } => {
795                    stack.push(cur.clone());
796                    cur = parser.clone();
797                }
798                Payload::End(_) => match stack.pop() {
799                    Some(p) => cur = p,
800                    None => done = true,
801                },
802
803                _ => {}
804            }
805
806            Some(Ok(payload))
807        })
808    }
809
810    /// Skip parsing the code section entirely.
811    ///
812    /// This function can be used to indicate, after receiving
813    /// `CodeSectionStart`, that the section will not be parsed.
814    ///
815    /// The caller will be responsible for skipping `size` bytes (found in the
816    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
817    /// after the `size` bytes have been skipped.
818    ///
819    /// # Panics
820    ///
821    /// This function will panic if the parser is not in a state where it's
822    /// parsing the code section.
823    ///
824    /// # Examples
825    ///
826    /// ```
827    /// use wasmparser::{Result, Parser, Chunk, Payload::*};
828    /// use std::ops::Range;
829    ///
830    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
831    ///     let mut parser = Parser::new(0);
832    ///     loop {
833    ///         let payload = match parser.parse(wasm, true)? {
834    ///             Chunk::Parsed { consumed, payload } => {
835    ///                 wasm = &wasm[consumed..];
836    ///                 payload
837    ///             }
838    ///             // this state isn't possible with `eof = true`
839    ///             Chunk::NeedMoreData(_) => unreachable!(),
840    ///         };
841    ///         match payload {
842    ///             TypeSection(s) => print_range("type section", &s.range()),
843    ///             ImportSection(s) => print_range("import section", &s.range()),
844    ///             // .. other sections
845    ///
846    ///             // Print the range of the code section we see, but don't
847    ///             // actually iterate over each individual function.
848    ///             CodeSectionStart { range, size, .. } => {
849    ///                 print_range("code section", &range);
850    ///                 parser.skip_section();
851    ///                 wasm = &wasm[size as usize..];
852    ///             }
853    ///             End(_) => break,
854    ///             _ => {}
855    ///         }
856    ///     }
857    ///     Ok(())
858    /// }
859    ///
860    /// fn print_range(section: &str, range: &Range<usize>) {
861    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
862    /// }
863    /// ```
864    pub fn skip_section(&mut self) {
865        let skip = match self.state {
866            State::FunctionBody { remaining: _, len } => len,
867            _ => panic!("wrong state to call `skip_section`"),
868        };
869        self.offset += u64::from(skip);
870        self.max_size -= u64::from(skip);
871        self.state = State::SectionStart;
872    }
873}
874
875fn usize_to_u64(a: usize) -> u64 {
876    a.try_into().unwrap()
877}
878
879/// Parses an entire section resident in memory into a `Payload`.
880///
881/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
882/// to construct the section to return.
883fn section<'a, T>(
884    reader: &mut BinaryReader<'a>,
885    len: u32,
886    ctor: fn(&'a [u8], usize) -> Result<T>,
887    variant: fn(T) -> Payload<'a>,
888) -> Result<Payload<'a>> {
889    let offset = reader.original_position();
890    let payload = reader.read_bytes(len as usize)?;
891    // clear the hint for "need this many more bytes" here because we already
892    // read all the bytes, so it's not possible to read more bytes if this
893    // fails.
894    let reader = ctor(payload, offset).map_err(clear_hint)?;
895    Ok(variant(reader))
896}
897
898/// Reads a section that is represented by a single uleb-encoded `u32`.
899fn single_item<'a, T>(
900    reader: &mut BinaryReader<'a>,
901    len: u32,
902    desc: &str,
903) -> Result<(T, Range<usize>)>
904where
905    T: FromReader<'a>,
906{
907    let range = reader.original_position()..reader.original_position() + len as usize;
908    let mut content = BinaryReader::new_with_offset(reader.read_bytes(len as usize)?, range.start);
909    // We can't recover from "unexpected eof" here because our entire section is
910    // already resident in memory, so clear the hint for how many more bytes are
911    // expected.
912    let ret = content.read().map_err(clear_hint)?;
913    if !content.eof() {
914        bail!(
915            content.original_position(),
916            "unexpected content in the {desc} section",
917        );
918    }
919    Ok((ret, range))
920}
921
922/// Attempts to parse using `f`.
923///
924/// This will update `*len` with the number of bytes consumed, and it will cause
925/// a failure to be returned instead of the number of bytes consumed exceeds
926/// what `*len` currently is.
927fn delimited<'a, T>(
928    reader: &mut BinaryReader<'a>,
929    len: &mut u32,
930    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
931) -> Result<T> {
932    let start = reader.position;
933    let ret = f(reader)?;
934    *len = match (reader.position - start)
935        .try_into()
936        .ok()
937        .and_then(|i| len.checked_sub(i))
938    {
939        Some(i) => i,
940        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
941    };
942    Ok(ret)
943}
944
945impl Default for Parser {
946    fn default() -> Parser {
947        Parser::new(0)
948    }
949}
950
951impl Payload<'_> {
952    /// If this `Payload` represents a section in the original wasm module then
953    /// the section's id and range within the original wasm binary are returned.
954    ///
955    /// Not all payloads refer to entire sections, such as the `Version` and
956    /// `CodeSectionEntry` variants. These variants will return `None` from this
957    /// function.
958    ///
959    /// Otherwise this function will return `Some` where the first element is
960    /// the byte identifier for the section and the second element is the range
961    /// of the contents of the section within the original wasm binary.
962    ///
963    /// The purpose of this method is to enable tools to easily iterate over
964    /// entire sections if necessary and handle sections uniformly, for example
965    /// dropping custom sections while preserving all other sections.
966    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
967        use Payload::*;
968
969        match self {
970            Version { .. } => None,
971            TypeSection(s) => Some((TYPE_SECTION, s.range())),
972            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
973            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
974            TableSection(s) => Some((TABLE_SECTION, s.range())),
975            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
976            TagSection(s) => Some((TAG_SECTION, s.range())),
977            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
978            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
979            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
980            DataSection(s) => Some((DATA_SECTION, s.range())),
981            StartSection { range, .. } => Some((START_SECTION, range.clone())),
982            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
983            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
984            CodeSectionEntry(_) => None,
985
986            ModuleSection { range, .. } => Some((COMPONENT_MODULE_SECTION, range.clone())),
987            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
988            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
989            ComponentSection { range, .. } => Some((COMPONENT_SECTION, range.clone())),
990            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
991            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
992            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
993            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
994            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
995            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
996            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
997
998            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
999
1000            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1001
1002            End(_) => None,
1003        }
1004    }
1005}
1006
1007impl fmt::Debug for Payload<'_> {
1008    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1009        use Payload::*;
1010        match self {
1011            Version {
1012                num,
1013                encoding,
1014                range,
1015            } => f
1016                .debug_struct("Version")
1017                .field("num", num)
1018                .field("encoding", encoding)
1019                .field("range", range)
1020                .finish(),
1021
1022            // Module sections
1023            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1024            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1025            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1026            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1027            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1028            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1029            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1030            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1031            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1032            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1033            StartSection { func, range } => f
1034                .debug_struct("StartSection")
1035                .field("func", func)
1036                .field("range", range)
1037                .finish(),
1038            DataCountSection { count, range } => f
1039                .debug_struct("DataCountSection")
1040                .field("count", count)
1041                .field("range", range)
1042                .finish(),
1043            CodeSectionStart { count, range, size } => f
1044                .debug_struct("CodeSectionStart")
1045                .field("count", count)
1046                .field("range", range)
1047                .field("size", size)
1048                .finish(),
1049            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1050
1051            // Component sections
1052            ModuleSection { parser: _, range } => f
1053                .debug_struct("ModuleSection")
1054                .field("range", range)
1055                .finish(),
1056            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1057            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1058            ComponentSection { parser: _, range } => f
1059                .debug_struct("ComponentSection")
1060                .field("range", range)
1061                .finish(),
1062            ComponentInstanceSection(_) => f
1063                .debug_tuple("ComponentInstanceSection")
1064                .field(&"...")
1065                .finish(),
1066            ComponentAliasSection(_) => f
1067                .debug_tuple("ComponentAliasSection")
1068                .field(&"...")
1069                .finish(),
1070            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1071            ComponentCanonicalSection(_) => f
1072                .debug_tuple("ComponentCanonicalSection")
1073                .field(&"...")
1074                .finish(),
1075            ComponentStartSection { .. } => f
1076                .debug_tuple("ComponentStartSection")
1077                .field(&"...")
1078                .finish(),
1079            ComponentImportSection(_) => f
1080                .debug_tuple("ComponentImportSection")
1081                .field(&"...")
1082                .finish(),
1083            ComponentExportSection(_) => f
1084                .debug_tuple("ComponentExportSection")
1085                .field(&"...")
1086                .finish(),
1087
1088            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1089
1090            UnknownSection { id, range, .. } => f
1091                .debug_struct("UnknownSection")
1092                .field("id", id)
1093                .field("range", range)
1094                .finish(),
1095
1096            End(offset) => f.debug_tuple("End").field(offset).finish(),
1097        }
1098    }
1099}
1100
1101fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1102    err.inner.needed_hint = None;
1103    err
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108    use super::*;
1109
1110    macro_rules! assert_matches {
1111        ($a:expr, $b:pat $(,)?) => {
1112            match $a {
1113                $b => {}
1114                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1115            }
1116        };
1117    }
1118
1119    #[test]
1120    fn header() {
1121        assert!(Parser::default().parse(&[], true).is_err());
1122        assert_matches!(
1123            Parser::default().parse(&[], false),
1124            Ok(Chunk::NeedMoreData(4)),
1125        );
1126        assert_matches!(
1127            Parser::default().parse(b"\0", false),
1128            Ok(Chunk::NeedMoreData(3)),
1129        );
1130        assert_matches!(
1131            Parser::default().parse(b"\0asm", false),
1132            Ok(Chunk::NeedMoreData(4)),
1133        );
1134        assert_matches!(
1135            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1136            Ok(Chunk::Parsed {
1137                consumed: 8,
1138                payload: Payload::Version { num: 1, .. },
1139            }),
1140        );
1141    }
1142
1143    #[test]
1144    fn header_iter() {
1145        for _ in Parser::default().parse_all(&[]) {}
1146        for _ in Parser::default().parse_all(b"\0") {}
1147        for _ in Parser::default().parse_all(b"\0asm") {}
1148        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1149    }
1150
1151    fn parser_after_header() -> Parser {
1152        let mut p = Parser::default();
1153        assert_matches!(
1154            p.parse(b"\0asm\x01\0\0\0", false),
1155            Ok(Chunk::Parsed {
1156                consumed: 8,
1157                payload: Payload::Version {
1158                    num: WASM_MODULE_VERSION,
1159                    encoding: Encoding::Module,
1160                    ..
1161                },
1162            }),
1163        );
1164        p
1165    }
1166
1167    fn parser_after_component_header() -> Parser {
1168        let mut p = Parser::default();
1169        assert_matches!(
1170            p.parse(b"\0asm\x0c\0\x01\0", false),
1171            Ok(Chunk::Parsed {
1172                consumed: 8,
1173                payload: Payload::Version {
1174                    num: WASM_COMPONENT_VERSION,
1175                    encoding: Encoding::Component,
1176                    ..
1177                },
1178            }),
1179        );
1180        p
1181    }
1182
1183    #[test]
1184    fn start_section() {
1185        assert_matches!(
1186            parser_after_header().parse(&[], false),
1187            Ok(Chunk::NeedMoreData(1)),
1188        );
1189        assert!(parser_after_header().parse(&[8], true).is_err());
1190        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1191        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1192        assert_matches!(
1193            parser_after_header().parse(&[8], false),
1194            Ok(Chunk::NeedMoreData(1)),
1195        );
1196        assert_matches!(
1197            parser_after_header().parse(&[8, 1], false),
1198            Ok(Chunk::NeedMoreData(1)),
1199        );
1200        assert_matches!(
1201            parser_after_header().parse(&[8, 2], false),
1202            Ok(Chunk::NeedMoreData(2)),
1203        );
1204        assert_matches!(
1205            parser_after_header().parse(&[8, 1, 1], false),
1206            Ok(Chunk::Parsed {
1207                consumed: 3,
1208                payload: Payload::StartSection { func: 1, .. },
1209            }),
1210        );
1211        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1212        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1213    }
1214
1215    #[test]
1216    fn end_works() {
1217        assert_matches!(
1218            parser_after_header().parse(&[], true),
1219            Ok(Chunk::Parsed {
1220                consumed: 0,
1221                payload: Payload::End(8),
1222            }),
1223        );
1224    }
1225
1226    #[test]
1227    fn type_section() {
1228        assert!(parser_after_header().parse(&[1], true).is_err());
1229        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1230        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1231        assert_matches!(
1232            parser_after_header().parse(&[1], false),
1233            Ok(Chunk::NeedMoreData(1)),
1234        );
1235        assert_matches!(
1236            parser_after_header().parse(&[1, 1], false),
1237            Ok(Chunk::NeedMoreData(1)),
1238        );
1239        assert_matches!(
1240            parser_after_header().parse(&[1, 1, 1], false),
1241            Ok(Chunk::Parsed {
1242                consumed: 3,
1243                payload: Payload::TypeSection(_),
1244            }),
1245        );
1246        assert_matches!(
1247            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1248            Ok(Chunk::Parsed {
1249                consumed: 3,
1250                payload: Payload::TypeSection(_),
1251            }),
1252        );
1253    }
1254
1255    #[test]
1256    fn custom_section() {
1257        assert!(parser_after_header().parse(&[0], true).is_err());
1258        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1259        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1260        assert_matches!(
1261            parser_after_header().parse(&[0, 2, 1], false),
1262            Ok(Chunk::NeedMoreData(1)),
1263        );
1264        assert_matches!(
1265            parser_after_header().parse(&[0, 1, 0], false),
1266            Ok(Chunk::Parsed {
1267                consumed: 3,
1268                payload: Payload::CustomSection(CustomSectionReader {
1269                    name: "",
1270                    data_offset: 11,
1271                    data: b"",
1272                    range: Range { start: 10, end: 11 },
1273                }),
1274            }),
1275        );
1276        assert_matches!(
1277            parser_after_header().parse(&[0, 2, 1, b'a'], false),
1278            Ok(Chunk::Parsed {
1279                consumed: 4,
1280                payload: Payload::CustomSection(CustomSectionReader {
1281                    name: "a",
1282                    data_offset: 12,
1283                    data: b"",
1284                    range: Range { start: 10, end: 12 },
1285                }),
1286            }),
1287        );
1288        assert_matches!(
1289            parser_after_header().parse(&[0, 2, 0, b'a'], false),
1290            Ok(Chunk::Parsed {
1291                consumed: 4,
1292                payload: Payload::CustomSection(CustomSectionReader {
1293                    name: "",
1294                    data_offset: 11,
1295                    data: b"a",
1296                    range: Range { start: 10, end: 12 },
1297                }),
1298            }),
1299        );
1300    }
1301
1302    #[test]
1303    fn function_section() {
1304        assert!(parser_after_header().parse(&[10], true).is_err());
1305        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1306        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1307        assert_matches!(
1308            parser_after_header().parse(&[10], false),
1309            Ok(Chunk::NeedMoreData(1))
1310        );
1311        assert_matches!(
1312            parser_after_header().parse(&[10, 1], false),
1313            Ok(Chunk::NeedMoreData(1))
1314        );
1315        let mut p = parser_after_header();
1316        assert_matches!(
1317            p.parse(&[10, 1, 0], false),
1318            Ok(Chunk::Parsed {
1319                consumed: 3,
1320                payload: Payload::CodeSectionStart { count: 0, .. },
1321            }),
1322        );
1323        assert_matches!(
1324            p.parse(&[], true),
1325            Ok(Chunk::Parsed {
1326                consumed: 0,
1327                payload: Payload::End(11),
1328            }),
1329        );
1330        let mut p = parser_after_header();
1331        assert_matches!(
1332            p.parse(&[10, 2, 1, 0], false),
1333            Ok(Chunk::Parsed {
1334                consumed: 3,
1335                payload: Payload::CodeSectionStart { count: 1, .. },
1336            }),
1337        );
1338        assert_matches!(
1339            p.parse(&[0], false),
1340            Ok(Chunk::Parsed {
1341                consumed: 1,
1342                payload: Payload::CodeSectionEntry(_),
1343            }),
1344        );
1345        assert_matches!(
1346            p.parse(&[], true),
1347            Ok(Chunk::Parsed {
1348                consumed: 0,
1349                payload: Payload::End(12),
1350            }),
1351        );
1352
1353        // 1 byte section with 1 function can't read the function body because
1354        // the section is too small
1355        let mut p = parser_after_header();
1356        assert_matches!(
1357            p.parse(&[10, 1, 1], false),
1358            Ok(Chunk::Parsed {
1359                consumed: 3,
1360                payload: Payload::CodeSectionStart { count: 1, .. },
1361            }),
1362        );
1363        assert_eq!(
1364            p.parse(&[0], false).unwrap_err().message(),
1365            "unexpected end-of-file"
1366        );
1367
1368        // section with 2 functions but section is cut off
1369        let mut p = parser_after_header();
1370        assert_matches!(
1371            p.parse(&[10, 2, 2], false),
1372            Ok(Chunk::Parsed {
1373                consumed: 3,
1374                payload: Payload::CodeSectionStart { count: 2, .. },
1375            }),
1376        );
1377        assert_matches!(
1378            p.parse(&[0], false),
1379            Ok(Chunk::Parsed {
1380                consumed: 1,
1381                payload: Payload::CodeSectionEntry(_),
1382            }),
1383        );
1384        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1385        assert_eq!(
1386            p.parse(&[0], false).unwrap_err().message(),
1387            "unexpected end-of-file",
1388        );
1389
1390        // trailing data is bad
1391        let mut p = parser_after_header();
1392        assert_matches!(
1393            p.parse(&[10, 3, 1], false),
1394            Ok(Chunk::Parsed {
1395                consumed: 3,
1396                payload: Payload::CodeSectionStart { count: 1, .. },
1397            }),
1398        );
1399        assert_matches!(
1400            p.parse(&[0], false),
1401            Ok(Chunk::Parsed {
1402                consumed: 1,
1403                payload: Payload::CodeSectionEntry(_),
1404            }),
1405        );
1406        assert_eq!(
1407            p.parse(&[0], false).unwrap_err().message(),
1408            "trailing bytes at end of section",
1409        );
1410    }
1411
1412    #[test]
1413    fn single_module() {
1414        let mut p = parser_after_component_header();
1415        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1416
1417        // A module that's 8 bytes in length
1418        let mut sub = match p.parse(&[1, 8], false) {
1419            Ok(Chunk::Parsed {
1420                consumed: 2,
1421                payload: Payload::ModuleSection { parser, .. },
1422            }) => parser,
1423            other => panic!("bad parse {:?}", other),
1424        };
1425
1426        // Parse the header of the submodule with the sub-parser.
1427        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1428        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1429        assert_matches!(
1430            sub.parse(b"\0asm\x01\0\0\0", false),
1431            Ok(Chunk::Parsed {
1432                consumed: 8,
1433                payload: Payload::Version {
1434                    num: 1,
1435                    encoding: Encoding::Module,
1436                    ..
1437                },
1438            }),
1439        );
1440
1441        // The sub-parser should be byte-limited so the next byte shouldn't get
1442        // consumed, it's intended for the parent parser.
1443        assert_matches!(
1444            sub.parse(&[10], false),
1445            Ok(Chunk::Parsed {
1446                consumed: 0,
1447                payload: Payload::End(18),
1448            }),
1449        );
1450
1451        // The parent parser should now be back to resuming, and we simulate it
1452        // being done with bytes to ensure that it's safely at the end,
1453        // completing the module code section.
1454        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1455        assert_matches!(
1456            p.parse(&[], true),
1457            Ok(Chunk::Parsed {
1458                consumed: 0,
1459                payload: Payload::End(18),
1460            }),
1461        );
1462    }
1463
1464    #[test]
1465    fn nested_section_too_big() {
1466        let mut p = parser_after_component_header();
1467
1468        // A module that's 10 bytes in length
1469        let mut sub = match p.parse(&[1, 10], false) {
1470            Ok(Chunk::Parsed {
1471                consumed: 2,
1472                payload: Payload::ModuleSection { parser, .. },
1473            }) => parser,
1474            other => panic!("bad parse {:?}", other),
1475        };
1476
1477        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1478        // module.
1479        assert_matches!(
1480            sub.parse(b"\0asm\x01\0\0\0", false),
1481            Ok(Chunk::Parsed {
1482                consumed: 8,
1483                payload: Payload::Version { num: 1, .. },
1484            }),
1485        );
1486
1487        // We can't parse a section which declares its bigger than the outer
1488        // module. This is a custom section, one byte big, with one content byte. The
1489        // content byte, however, lives outside of the parent's module code
1490        // section.
1491        assert_eq!(
1492            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1493            "section too large",
1494        );
1495    }
1496}
wasmparser/parser.rs

wasmparser/
parser.rs