wasmparser/parser.rs
1use crate::CoreTypeSectionReader;
2use crate::{
3 limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
4 ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
5 ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
6 ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
7 GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
8 SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
9};
10use std::convert::TryInto;
11use std::fmt;
12use std::iter;
13use std::ops::Range;
14
15pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
16
17// Note that this started at `0xa` and we're incrementing up from there. When
18// the component model is stabilized this will become 0x1. The changes here are:
19//
20// * [????-??-??] 0xa - original version
21// * [2022-01-05] 0xb - `export` introduces an alias
22// * [2022-02-06] 0xc - `export` has an optional type ascribed to it
23pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xc;
24
25/// The supported encoding formats for the parser.
26#[derive(Debug, Clone, Copy, Eq, PartialEq)]
27pub enum Encoding {
28 /// The encoding format is a WebAssembly module.
29 Module,
30 /// The encoding format is a WebAssembly component.
31 Component,
32}
33
34/// An incremental parser of a binary WebAssembly module or component.
35///
36/// This type is intended to be used to incrementally parse a WebAssembly module
37/// or component as bytes become available for the module. This can also be used
38/// to parse modules or components that are already entirely resident within memory.
39///
40/// This primary function for a parser is the [`Parser::parse`] function which
41/// will incrementally consume input. You can also use the [`Parser::parse_all`]
42/// function to parse a module or component that is entirely resident in memory.
43#[derive(Debug, Clone)]
44pub struct Parser {
45 state: State,
46 offset: u64,
47 max_size: u64,
48 encoding: Encoding,
49}
50
51#[derive(Debug, Clone)]
52enum State {
53 Header,
54 SectionStart,
55 FunctionBody { remaining: u32, len: u32 },
56}
57
58/// A successful return payload from [`Parser::parse`].
59///
60/// On success one of two possible values can be returned, either that more data
61/// is needed to continue parsing or a chunk of the input was parsed, indicating
62/// how much of it was parsed.
63#[derive(Debug)]
64pub enum Chunk<'a> {
65 /// This can be returned at any time and indicates that more data is needed
66 /// to proceed with parsing. Zero bytes were consumed from the input to
67 /// [`Parser::parse`]. The `usize` value here is a hint as to how many more
68 /// bytes are needed to continue parsing.
69 NeedMoreData(u64),
70
71 /// A chunk was successfully parsed.
72 Parsed {
73 /// This many bytes of the `data` input to [`Parser::parse`] were
74 /// consumed to produce `payload`.
75 consumed: usize,
76 /// The value that we actually parsed.
77 payload: Payload<'a>,
78 },
79}
80
81/// Values that can be parsed from a WebAssembly module or component.
82///
83/// This enumeration is all possible chunks of pieces that can be parsed by a
84/// [`Parser`] from a binary WebAssembly module or component. Note that for many
85/// sections the entire section is parsed all at once, whereas other functions,
86/// like the code section, are parsed incrementally. This is a distinction where some
87/// sections, like the type section, are required to be fully resident in memory
88/// (fully downloaded) before proceeding. Other sections, like the code section,
89/// can be processed in a streaming fashion where each function is extracted
90/// individually so it can possibly be shipped to another thread while you wait
91/// for more functions to get downloaded.
92///
93/// Note that payloads, when returned, do not indicate that the module or component
94/// is valid. For example when you receive a `Payload::TypeSection` the type
95/// section itself has not yet actually been parsed. The reader returned will be
96/// able to parse it, but you'll have to actually iterate the reader to do the
97/// full parse. Each payload returned is intended to be a *window* into the
98/// original `data` passed to [`Parser::parse`] which can be further processed
99/// if necessary.
100pub enum Payload<'a> {
101 /// Indicates the header of a WebAssembly module or component.
102 Version {
103 /// The version number found in the header.
104 num: u16,
105 /// The encoding format being parsed.
106 encoding: Encoding,
107 /// The range of bytes that were parsed to consume the header of the
108 /// module or component. Note that this range is relative to the start
109 /// of the byte stream.
110 range: Range<usize>,
111 },
112
113 /// A module type section was received and the provided reader can be
114 /// used to parse the contents of the type section.
115 TypeSection(TypeSectionReader<'a>),
116 /// A module import section was received and the provided reader can be
117 /// used to parse the contents of the import section.
118 ImportSection(ImportSectionReader<'a>),
119 /// A module function section was received and the provided reader can be
120 /// used to parse the contents of the function section.
121 FunctionSection(FunctionSectionReader<'a>),
122 /// A module table section was received and the provided reader can be
123 /// used to parse the contents of the table section.
124 TableSection(TableSectionReader<'a>),
125 /// A module memory section was received and the provided reader can be
126 /// used to parse the contents of the memory section.
127 MemorySection(MemorySectionReader<'a>),
128 /// A module tag section was received, and the provided reader can be
129 /// used to parse the contents of the tag section.
130 TagSection(TagSectionReader<'a>),
131 /// A module global section was received and the provided reader can be
132 /// used to parse the contents of the global section.
133 GlobalSection(GlobalSectionReader<'a>),
134 /// A module export section was received, and the provided reader can be
135 /// used to parse the contents of the export section.
136 ExportSection(ExportSectionReader<'a>),
137 /// A module start section was received.
138 StartSection {
139 /// The start function index
140 func: u32,
141 /// The range of bytes that specify the `func` field, specified in
142 /// offsets relative to the start of the byte stream.
143 range: Range<usize>,
144 },
145 /// A module element section was received and the provided reader can be
146 /// used to parse the contents of the element section.
147 ElementSection(ElementSectionReader<'a>),
148 /// A module data count section was received.
149 DataCountSection {
150 /// The number of data segments.
151 count: u32,
152 /// The range of bytes that specify the `count` field, specified in
153 /// offsets relative to the start of the byte stream.
154 range: Range<usize>,
155 },
156 /// A module data section was received and the provided reader can be
157 /// used to parse the contents of the data section.
158 DataSection(DataSectionReader<'a>),
159 /// Indicator of the start of the code section of a WebAssembly module.
160 ///
161 /// This entry is returned whenever the code section starts. The `count`
162 /// field indicates how many entries are in this code section. After
163 /// receiving this start marker you're guaranteed that the next `count`
164 /// items will be either `CodeSectionEntry` or an error will be returned.
165 ///
166 /// This, unlike other sections, is intended to be used for streaming the
167 /// contents of the code section. The code section is not required to be
168 /// fully resident in memory when we parse it. Instead a [`Parser`] is
169 /// capable of parsing piece-by-piece of a code section.
170 CodeSectionStart {
171 /// The number of functions in this section.
172 count: u32,
173 /// The range of bytes that represent this section, specified in
174 /// offsets relative to the start of the byte stream.
175 range: Range<usize>,
176 /// The size, in bytes, of the remaining contents of this section.
177 ///
178 /// This can be used in combination with [`Parser::skip_section`]
179 /// where the caller will know how many bytes to skip before feeding
180 /// bytes into `Parser` again.
181 size: u32,
182 },
183 /// An entry of the code section, a function, was parsed from a WebAssembly
184 /// module.
185 ///
186 /// This entry indicates that a function was successfully received from the
187 /// code section, and the payload here is the window into the original input
188 /// where the function resides. Note that the function itself has not been
189 /// parsed, it's only been outlined. You'll need to process the
190 /// `FunctionBody` provided to test whether it parses and/or is valid.
191 CodeSectionEntry(FunctionBody<'a>),
192
193 /// A core module section was received and the provided parser can be
194 /// used to parse the nested module.
195 ///
196 /// This variant is special in that it returns a sub-`Parser`. Upon
197 /// receiving a `ModuleSection` it is expected that the returned
198 /// `Parser` will be used instead of the parent `Parser` until the parse has
199 /// finished. You'll need to feed data into the `Parser` returned until it
200 /// returns `Payload::End`. After that you'll switch back to the parent
201 /// parser to resume parsing the rest of the current component.
202 ///
203 /// Note that binaries will not be parsed correctly if you feed the data for
204 /// a nested module into the parent [`Parser`].
205 ModuleSection {
206 /// The parser for the nested module.
207 parser: Parser,
208 /// The range of bytes that represent the nested module in the
209 /// original byte stream.
210 range: Range<usize>,
211 },
212 /// A core instance section was received and the provided parser can be
213 /// used to parse the contents of the core instance section.
214 ///
215 /// Currently this section is only parsed in a component.
216 InstanceSection(InstanceSectionReader<'a>),
217 /// A core type section was received and the provided parser can be
218 /// used to parse the contents of the core type section.
219 ///
220 /// Currently this section is only parsed in a component.
221 CoreTypeSection(CoreTypeSectionReader<'a>),
222 /// A component section from a WebAssembly component was received and the
223 /// provided parser can be used to parse the nested component.
224 ///
225 /// This variant is special in that it returns a sub-`Parser`. Upon
226 /// receiving a `ComponentSection` it is expected that the returned
227 /// `Parser` will be used instead of the parent `Parser` until the parse has
228 /// finished. You'll need to feed data into the `Parser` returned until it
229 /// returns `Payload::End`. After that you'll switch back to the parent
230 /// parser to resume parsing the rest of the current component.
231 ///
232 /// Note that binaries will not be parsed correctly if you feed the data for
233 /// a nested component into the parent [`Parser`].
234 ComponentSection {
235 /// The parser for the nested component.
236 parser: Parser,
237 /// The range of bytes that represent the nested component in the
238 /// original byte stream.
239 range: Range<usize>,
240 },
241 /// A component instance section was received and the provided reader can be
242 /// used to parse the contents of the component instance section.
243 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
244 /// A component alias section was received and the provided reader can be
245 /// used to parse the contents of the component alias section.
246 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
247 /// A component type section was received and the provided reader can be
248 /// used to parse the contents of the component type section.
249 ComponentTypeSection(ComponentTypeSectionReader<'a>),
250 /// A component canonical section was received and the provided reader can be
251 /// used to parse the contents of the component canonical section.
252 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
253 /// A component start section was received.
254 ComponentStartSection {
255 /// The start function description.
256 start: ComponentStartFunction,
257 /// The range of bytes that specify the `start` field.
258 range: Range<usize>,
259 },
260 /// A component import section was received and the provided reader can be
261 /// used to parse the contents of the component import section.
262 ComponentImportSection(ComponentImportSectionReader<'a>),
263 /// A component export section was received, and the provided reader can be
264 /// used to parse the contents of the component export section.
265 ComponentExportSection(ComponentExportSectionReader<'a>),
266
267 /// A module or component custom section was received.
268 CustomSection(CustomSectionReader<'a>),
269
270 /// An unknown section was found.
271 ///
272 /// This variant is returned for all unknown sections encountered. This
273 /// likely wants to be interpreted as an error by consumers of the parser,
274 /// but this can also be used to parse sections currently unsupported by
275 /// the parser.
276 UnknownSection {
277 /// The 8-bit identifier for this section.
278 id: u8,
279 /// The contents of this section.
280 contents: &'a [u8],
281 /// The range of bytes, relative to the start of the original data
282 /// stream, that the contents of this section reside in.
283 range: Range<usize>,
284 },
285
286 /// The end of the WebAssembly module or component was reached.
287 ///
288 /// The value is the offset in the input byte stream where the end
289 /// was reached.
290 End(usize),
291}
292
293const CUSTOM_SECTION: u8 = 0;
294const TYPE_SECTION: u8 = 1;
295const IMPORT_SECTION: u8 = 2;
296const FUNCTION_SECTION: u8 = 3;
297const TABLE_SECTION: u8 = 4;
298const MEMORY_SECTION: u8 = 5;
299const GLOBAL_SECTION: u8 = 6;
300const EXPORT_SECTION: u8 = 7;
301const START_SECTION: u8 = 8;
302const ELEMENT_SECTION: u8 = 9;
303const CODE_SECTION: u8 = 10;
304const DATA_SECTION: u8 = 11;
305const DATA_COUNT_SECTION: u8 = 12;
306const TAG_SECTION: u8 = 13;
307
308const COMPONENT_MODULE_SECTION: u8 = 1;
309const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
310const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
311const COMPONENT_SECTION: u8 = 4;
312const COMPONENT_INSTANCE_SECTION: u8 = 5;
313const COMPONENT_ALIAS_SECTION: u8 = 6;
314const COMPONENT_TYPE_SECTION: u8 = 7;
315const COMPONENT_CANONICAL_SECTION: u8 = 8;
316const COMPONENT_START_SECTION: u8 = 9;
317const COMPONENT_IMPORT_SECTION: u8 = 10;
318const COMPONENT_EXPORT_SECTION: u8 = 11;
319
320impl Parser {
321 /// Creates a new parser.
322 ///
323 /// Reports errors and ranges relative to `offset` provided, where `offset`
324 /// is some logical offset within the input stream that we're parsing.
325 pub fn new(offset: u64) -> Parser {
326 Parser {
327 state: State::Header,
328 offset,
329 max_size: u64::MAX,
330 // Assume the encoding is a module until we know otherwise
331 encoding: Encoding::Module,
332 }
333 }
334
335 /// Attempts to parse a chunk of data.
336 ///
337 /// This method will attempt to parse the next incremental portion of a
338 /// WebAssembly binary. Data available for the module or component is
339 /// provided as `data`, and the data can be incomplete if more data has yet
340 /// to arrive. The `eof` flag indicates whether more data will ever be received.
341 ///
342 /// There are two ways parsing can succeed with this method:
343 ///
344 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
345 /// in `data` to parse a payload. The caller needs to wait for more data to
346 /// be available in this situation before calling this method again. It is
347 /// guaranteed that this is only returned if `eof` is `false`.
348 ///
349 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
350 /// successfully parsed. The payload is available in this variant of what
351 /// was parsed, and this also indicates how many bytes of `data` was
352 /// consumed. It's expected that the caller will not provide these bytes
353 /// back to the [`Parser`] again.
354 ///
355 /// Note that all `Chunk` return values are connected, with a lifetime, to
356 /// the input buffer. Each parsed chunk borrows the input buffer and is a
357 /// view into it for successfully parsed chunks.
358 ///
359 /// It is expected that you'll call this method until `Payload::End` is
360 /// reached, at which point you're guaranteed that the parse has completed.
361 /// Note that complete parsing, for the top-level module or component,
362 /// implies that `data` is empty and `eof` is `true`.
363 ///
364 /// # Errors
365 ///
366 /// Parse errors are returned as an `Err`. Errors can happen when the
367 /// structure of the data is unexpected or if sections are too large for
368 /// example. Note that errors are not returned for malformed *contents* of
369 /// sections here. Sections are generally not individually parsed and each
370 /// returned [`Payload`] needs to be iterated over further to detect all
371 /// errors.
372 ///
373 /// # Examples
374 ///
375 /// An example of reading a wasm file from a stream (`std::io::Read`) and
376 /// incrementally parsing it.
377 ///
378 /// ```
379 /// use std::io::Read;
380 /// use anyhow::Result;
381 /// use wasmparser::{Parser, Chunk, Payload::*};
382 ///
383 /// fn parse(mut reader: impl Read) -> Result<()> {
384 /// let mut buf = Vec::new();
385 /// let mut parser = Parser::new(0);
386 /// let mut eof = false;
387 /// let mut stack = Vec::new();
388 ///
389 /// loop {
390 /// let (payload, consumed) = match parser.parse(&buf, eof)? {
391 /// Chunk::NeedMoreData(hint) => {
392 /// assert!(!eof); // otherwise an error would be returned
393 ///
394 /// // Use the hint to preallocate more space, then read
395 /// // some more data into our buffer.
396 /// //
397 /// // Note that the buffer management here is not ideal,
398 /// // but it's compact enough to fit in an example!
399 /// let len = buf.len();
400 /// buf.extend((0..hint).map(|_| 0u8));
401 /// let n = reader.read(&mut buf[len..])?;
402 /// buf.truncate(len + n);
403 /// eof = n == 0;
404 /// continue;
405 /// }
406 ///
407 /// Chunk::Parsed { consumed, payload } => (payload, consumed),
408 /// };
409 ///
410 /// match payload {
411 /// // Sections for WebAssembly modules
412 /// Version { .. } => { /* ... */ }
413 /// TypeSection(_) => { /* ... */ }
414 /// ImportSection(_) => { /* ... */ }
415 /// FunctionSection(_) => { /* ... */ }
416 /// TableSection(_) => { /* ... */ }
417 /// MemorySection(_) => { /* ... */ }
418 /// TagSection(_) => { /* ... */ }
419 /// GlobalSection(_) => { /* ... */ }
420 /// ExportSection(_) => { /* ... */ }
421 /// StartSection { .. } => { /* ... */ }
422 /// ElementSection(_) => { /* ... */ }
423 /// DataCountSection { .. } => { /* ... */ }
424 /// DataSection(_) => { /* ... */ }
425 ///
426 /// // Here we know how many functions we'll be receiving as
427 /// // `CodeSectionEntry`, so we can prepare for that, and
428 /// // afterwards we can parse and handle each function
429 /// // individually.
430 /// CodeSectionStart { .. } => { /* ... */ }
431 /// CodeSectionEntry(body) => {
432 /// // here we can iterate over `body` to parse the function
433 /// // and its locals
434 /// }
435 ///
436 /// // Sections for WebAssembly components
437 /// ModuleSection { .. } => { /* ... */ }
438 /// InstanceSection(_) => { /* ... */ }
439 /// CoreTypeSection(_) => { /* ... */ }
440 /// ComponentSection { .. } => { /* ... */ }
441 /// ComponentInstanceSection(_) => { /* ... */ }
442 /// ComponentAliasSection(_) => { /* ... */ }
443 /// ComponentTypeSection(_) => { /* ... */ }
444 /// ComponentCanonicalSection(_) => { /* ... */ }
445 /// ComponentStartSection { .. } => { /* ... */ }
446 /// ComponentImportSection(_) => { /* ... */ }
447 /// ComponentExportSection(_) => { /* ... */ }
448 ///
449 /// CustomSection(_) => { /* ... */ }
450 ///
451 /// // most likely you'd return an error here
452 /// UnknownSection { id, .. } => { /* ... */ }
453 ///
454 /// // Once we've reached the end of a parser we either resume
455 /// // at the parent parser or we break out of the loop because
456 /// // we're done.
457 /// End(_) => {
458 /// if let Some(parent_parser) = stack.pop() {
459 /// parser = parent_parser;
460 /// } else {
461 /// break;
462 /// }
463 /// }
464 /// }
465 ///
466 /// // once we're done processing the payload we can forget the
467 /// // original.
468 /// buf.drain(..consumed);
469 /// }
470 ///
471 /// Ok(())
472 /// }
473 ///
474 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
475 /// ```
476 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
477 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
478 (&data[..(self.max_size as usize)], true)
479 } else {
480 (data, eof)
481 };
482 // TODO: thread through `offset: u64` to `BinaryReader`, remove
483 // the cast here.
484 let mut reader = BinaryReader::new_with_offset(data, self.offset as usize);
485 match self.parse_reader(&mut reader, eof) {
486 Ok(payload) => {
487 // Be sure to update our offset with how far we got in the
488 // reader
489 self.offset += usize_to_u64(reader.position);
490 self.max_size -= usize_to_u64(reader.position);
491 Ok(Chunk::Parsed {
492 consumed: reader.position,
493 payload,
494 })
495 }
496 Err(e) => {
497 // If we're at EOF then there's no way we can recover from any
498 // error, so continue to propagate it.
499 if eof {
500 return Err(e);
501 }
502
503 // If our error doesn't look like it can be resolved with more
504 // data being pulled down, then propagate it, otherwise switch
505 // the error to "feed me please"
506 match e.inner.needed_hint {
507 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
508 None => Err(e),
509 }
510 }
511 }
512 }
513
514 fn parse_reader<'a>(
515 &mut self,
516 reader: &mut BinaryReader<'a>,
517 eof: bool,
518 ) -> Result<Payload<'a>> {
519 use Payload::*;
520
521 match self.state {
522 State::Header => {
523 const KIND_MODULE: u16 = 0x00;
524 const KIND_COMPONENT: u16 = 0x01;
525
526 let start = reader.original_position();
527 let header_version = reader.read_header_version()?;
528 self.encoding = match (header_version >> 16) as u16 {
529 KIND_MODULE => Encoding::Module,
530 KIND_COMPONENT => Encoding::Component,
531 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
532 };
533 let num = header_version as u16;
534 self.state = State::SectionStart;
535 Ok(Version {
536 num,
537 encoding: self.encoding,
538 range: start..reader.original_position(),
539 })
540 }
541 State::SectionStart => {
542 // If we're at eof and there are no bytes in our buffer, then
543 // that means we reached the end of the data since it's
544 // just a bunch of sections concatenated after the header.
545 if eof && reader.bytes_remaining() == 0 {
546 return Ok(Payload::End(reader.original_position()));
547 }
548
549 let id_pos = reader.position;
550 let id = reader.read_u8()?;
551 if id & 0x80 != 0 {
552 return Err(BinaryReaderError::new("malformed section id", id_pos));
553 }
554 let len_pos = reader.original_position();
555 let mut len = reader.read_var_u32()?;
556
557 // Test to make sure that this section actually fits within
558 // `Parser::max_size`. This doesn't matter for top-level modules
559 // but it is required for nested modules/components to correctly ensure
560 // that all sections live entirely within their section of the
561 // file.
562 let section_overflow = self
563 .max_size
564 .checked_sub(usize_to_u64(reader.position))
565 .and_then(|s| s.checked_sub(len.into()))
566 .is_none();
567 if section_overflow {
568 return Err(BinaryReaderError::new("section too large", len_pos));
569 }
570
571 match (self.encoding, id) {
572 // Sections for both modules and components.
573 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
574
575 // Module sections
576 (Encoding::Module, TYPE_SECTION) => {
577 section(reader, len, TypeSectionReader::new, TypeSection)
578 }
579 (Encoding::Module, IMPORT_SECTION) => {
580 section(reader, len, ImportSectionReader::new, ImportSection)
581 }
582 (Encoding::Module, FUNCTION_SECTION) => {
583 section(reader, len, FunctionSectionReader::new, FunctionSection)
584 }
585 (Encoding::Module, TABLE_SECTION) => {
586 section(reader, len, TableSectionReader::new, TableSection)
587 }
588 (Encoding::Module, MEMORY_SECTION) => {
589 section(reader, len, MemorySectionReader::new, MemorySection)
590 }
591 (Encoding::Module, GLOBAL_SECTION) => {
592 section(reader, len, GlobalSectionReader::new, GlobalSection)
593 }
594 (Encoding::Module, EXPORT_SECTION) => {
595 section(reader, len, ExportSectionReader::new, ExportSection)
596 }
597 (Encoding::Module, START_SECTION) => {
598 let (func, range) = single_item(reader, len, "start")?;
599 Ok(StartSection { func, range })
600 }
601 (Encoding::Module, ELEMENT_SECTION) => {
602 section(reader, len, ElementSectionReader::new, ElementSection)
603 }
604 (Encoding::Module, CODE_SECTION) => {
605 let start = reader.original_position();
606 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
607 let range = start..reader.original_position() + len as usize;
608 self.state = State::FunctionBody {
609 remaining: count,
610 len,
611 };
612 Ok(CodeSectionStart {
613 count,
614 range,
615 size: len,
616 })
617 }
618 (Encoding::Module, DATA_SECTION) => {
619 section(reader, len, DataSectionReader::new, DataSection)
620 }
621 (Encoding::Module, DATA_COUNT_SECTION) => {
622 let (count, range) = single_item(reader, len, "data count")?;
623 Ok(DataCountSection { count, range })
624 }
625 (Encoding::Module, TAG_SECTION) => {
626 section(reader, len, TagSectionReader::new, TagSection)
627 }
628
629 // Component sections
630 (Encoding::Component, COMPONENT_MODULE_SECTION)
631 | (Encoding::Component, COMPONENT_SECTION) => {
632 if len as usize > MAX_WASM_MODULE_SIZE {
633 bail!(
634 len_pos,
635 "{} section is too large",
636 if id == 1 { "module" } else { "component " }
637 );
638 }
639
640 let range =
641 reader.original_position()..reader.original_position() + len as usize;
642 self.max_size -= u64::from(len);
643 self.offset += u64::from(len);
644 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
645 parser.max_size = len.into();
646
647 Ok(match id {
648 1 => ModuleSection { parser, range },
649 4 => ComponentSection { parser, range },
650 _ => unreachable!(),
651 })
652 }
653 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
654 section(reader, len, InstanceSectionReader::new, InstanceSection)
655 }
656 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
657 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
658 }
659 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
660 reader,
661 len,
662 ComponentInstanceSectionReader::new,
663 ComponentInstanceSection,
664 ),
665 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
666 section(reader, len, SectionLimited::new, ComponentAliasSection)
667 }
668 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
669 reader,
670 len,
671 ComponentTypeSectionReader::new,
672 ComponentTypeSection,
673 ),
674 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
675 reader,
676 len,
677 ComponentCanonicalSectionReader::new,
678 ComponentCanonicalSection,
679 ),
680 (Encoding::Component, COMPONENT_START_SECTION) => {
681 let (start, range) = single_item(reader, len, "component start")?;
682 Ok(ComponentStartSection { start, range })
683 }
684 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
685 reader,
686 len,
687 ComponentImportSectionReader::new,
688 ComponentImportSection,
689 ),
690 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
691 reader,
692 len,
693 ComponentExportSectionReader::new,
694 ComponentExportSection,
695 ),
696 (_, id) => {
697 let offset = reader.original_position();
698 let contents = reader.read_bytes(len as usize)?;
699 let range = offset..offset + len as usize;
700 Ok(UnknownSection {
701 id,
702 contents,
703 range,
704 })
705 }
706 }
707 }
708
709 // Once we hit 0 remaining incrementally parsed items, with 0
710 // remaining bytes in each section, we're done and can switch back
711 // to parsing sections.
712 State::FunctionBody {
713 remaining: 0,
714 len: 0,
715 } => {
716 self.state = State::SectionStart;
717 self.parse_reader(reader, eof)
718 }
719
720 // ... otherwise trailing bytes with no remaining entries in these
721 // sections indicates an error.
722 State::FunctionBody { remaining: 0, len } => {
723 debug_assert!(len > 0);
724 let offset = reader.original_position();
725 Err(BinaryReaderError::new(
726 "trailing bytes at end of section",
727 offset,
728 ))
729 }
730
731 // Functions are relatively easy to parse when we know there's at
732 // least one remaining and at least one byte available to read
733 // things.
734 //
735 // We use the remaining length try to read a u32 size of the
736 // function, and using that size we require the entire function be
737 // resident in memory. This means that we're reading whole chunks of
738 // functions at a time.
739 //
740 // Limiting via `Parser::max_size` (nested parsing) happens above in
741 // `fn parse`, and limiting by our section size happens via
742 // `delimited`. Actual parsing of the function body is delegated to
743 // the caller to iterate over the `FunctionBody` structure.
744 State::FunctionBody { remaining, mut len } => {
745 let body = delimited(reader, &mut len, |r| {
746 let size = r.read_var_u32()?;
747 let offset = r.original_position();
748 Ok(FunctionBody::new(offset, r.read_bytes(size as usize)?))
749 })?;
750 self.state = State::FunctionBody {
751 remaining: remaining - 1,
752 len,
753 };
754 Ok(CodeSectionEntry(body))
755 }
756 }
757 }
758
759 /// Convenience function that can be used to parse a module or component
760 /// that is entirely resident in memory.
761 ///
762 /// This function will parse the `data` provided as a WebAssembly module
763 /// or component.
764 ///
765 /// Note that when this function yields sections that provide parsers,
766 /// no further action is required for those sections as payloads from
767 /// those parsers will be automatically returned.
768 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
769 let mut stack = Vec::new();
770 let mut cur = self;
771 let mut done = false;
772 iter::from_fn(move || {
773 if done {
774 return None;
775 }
776 let payload = match cur.parse(data, true) {
777 // Propagate all errors
778 Err(e) => {
779 done = true;
780 return Some(Err(e));
781 }
782
783 // This isn't possible because `eof` is always true.
784 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
785
786 Ok(Chunk::Parsed { payload, consumed }) => {
787 data = &data[consumed..];
788 payload
789 }
790 };
791
792 match &payload {
793 Payload::ModuleSection { parser, .. }
794 | Payload::ComponentSection { parser, .. } => {
795 stack.push(cur.clone());
796 cur = parser.clone();
797 }
798 Payload::End(_) => match stack.pop() {
799 Some(p) => cur = p,
800 None => done = true,
801 },
802
803 _ => {}
804 }
805
806 Some(Ok(payload))
807 })
808 }
809
810 /// Skip parsing the code section entirely.
811 ///
812 /// This function can be used to indicate, after receiving
813 /// `CodeSectionStart`, that the section will not be parsed.
814 ///
815 /// The caller will be responsible for skipping `size` bytes (found in the
816 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
817 /// after the `size` bytes have been skipped.
818 ///
819 /// # Panics
820 ///
821 /// This function will panic if the parser is not in a state where it's
822 /// parsing the code section.
823 ///
824 /// # Examples
825 ///
826 /// ```
827 /// use wasmparser::{Result, Parser, Chunk, Payload::*};
828 /// use std::ops::Range;
829 ///
830 /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
831 /// let mut parser = Parser::new(0);
832 /// loop {
833 /// let payload = match parser.parse(wasm, true)? {
834 /// Chunk::Parsed { consumed, payload } => {
835 /// wasm = &wasm[consumed..];
836 /// payload
837 /// }
838 /// // this state isn't possible with `eof = true`
839 /// Chunk::NeedMoreData(_) => unreachable!(),
840 /// };
841 /// match payload {
842 /// TypeSection(s) => print_range("type section", &s.range()),
843 /// ImportSection(s) => print_range("import section", &s.range()),
844 /// // .. other sections
845 ///
846 /// // Print the range of the code section we see, but don't
847 /// // actually iterate over each individual function.
848 /// CodeSectionStart { range, size, .. } => {
849 /// print_range("code section", &range);
850 /// parser.skip_section();
851 /// wasm = &wasm[size as usize..];
852 /// }
853 /// End(_) => break,
854 /// _ => {}
855 /// }
856 /// }
857 /// Ok(())
858 /// }
859 ///
860 /// fn print_range(section: &str, range: &Range<usize>) {
861 /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
862 /// }
863 /// ```
864 pub fn skip_section(&mut self) {
865 let skip = match self.state {
866 State::FunctionBody { remaining: _, len } => len,
867 _ => panic!("wrong state to call `skip_section`"),
868 };
869 self.offset += u64::from(skip);
870 self.max_size -= u64::from(skip);
871 self.state = State::SectionStart;
872 }
873}
874
875fn usize_to_u64(a: usize) -> u64 {
876 a.try_into().unwrap()
877}
878
879/// Parses an entire section resident in memory into a `Payload`.
880///
881/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
882/// to construct the section to return.
883fn section<'a, T>(
884 reader: &mut BinaryReader<'a>,
885 len: u32,
886 ctor: fn(&'a [u8], usize) -> Result<T>,
887 variant: fn(T) -> Payload<'a>,
888) -> Result<Payload<'a>> {
889 let offset = reader.original_position();
890 let payload = reader.read_bytes(len as usize)?;
891 // clear the hint for "need this many more bytes" here because we already
892 // read all the bytes, so it's not possible to read more bytes if this
893 // fails.
894 let reader = ctor(payload, offset).map_err(clear_hint)?;
895 Ok(variant(reader))
896}
897
898/// Reads a section that is represented by a single uleb-encoded `u32`.
899fn single_item<'a, T>(
900 reader: &mut BinaryReader<'a>,
901 len: u32,
902 desc: &str,
903) -> Result<(T, Range<usize>)>
904where
905 T: FromReader<'a>,
906{
907 let range = reader.original_position()..reader.original_position() + len as usize;
908 let mut content = BinaryReader::new_with_offset(reader.read_bytes(len as usize)?, range.start);
909 // We can't recover from "unexpected eof" here because our entire section is
910 // already resident in memory, so clear the hint for how many more bytes are
911 // expected.
912 let ret = content.read().map_err(clear_hint)?;
913 if !content.eof() {
914 bail!(
915 content.original_position(),
916 "unexpected content in the {desc} section",
917 );
918 }
919 Ok((ret, range))
920}
921
922/// Attempts to parse using `f`.
923///
924/// This will update `*len` with the number of bytes consumed, and it will cause
925/// a failure to be returned instead of the number of bytes consumed exceeds
926/// what `*len` currently is.
927fn delimited<'a, T>(
928 reader: &mut BinaryReader<'a>,
929 len: &mut u32,
930 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
931) -> Result<T> {
932 let start = reader.position;
933 let ret = f(reader)?;
934 *len = match (reader.position - start)
935 .try_into()
936 .ok()
937 .and_then(|i| len.checked_sub(i))
938 {
939 Some(i) => i,
940 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
941 };
942 Ok(ret)
943}
944
945impl Default for Parser {
946 fn default() -> Parser {
947 Parser::new(0)
948 }
949}
950
951impl Payload<'_> {
952 /// If this `Payload` represents a section in the original wasm module then
953 /// the section's id and range within the original wasm binary are returned.
954 ///
955 /// Not all payloads refer to entire sections, such as the `Version` and
956 /// `CodeSectionEntry` variants. These variants will return `None` from this
957 /// function.
958 ///
959 /// Otherwise this function will return `Some` where the first element is
960 /// the byte identifier for the section and the second element is the range
961 /// of the contents of the section within the original wasm binary.
962 ///
963 /// The purpose of this method is to enable tools to easily iterate over
964 /// entire sections if necessary and handle sections uniformly, for example
965 /// dropping custom sections while preserving all other sections.
966 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
967 use Payload::*;
968
969 match self {
970 Version { .. } => None,
971 TypeSection(s) => Some((TYPE_SECTION, s.range())),
972 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
973 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
974 TableSection(s) => Some((TABLE_SECTION, s.range())),
975 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
976 TagSection(s) => Some((TAG_SECTION, s.range())),
977 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
978 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
979 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
980 DataSection(s) => Some((DATA_SECTION, s.range())),
981 StartSection { range, .. } => Some((START_SECTION, range.clone())),
982 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
983 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
984 CodeSectionEntry(_) => None,
985
986 ModuleSection { range, .. } => Some((COMPONENT_MODULE_SECTION, range.clone())),
987 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
988 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
989 ComponentSection { range, .. } => Some((COMPONENT_SECTION, range.clone())),
990 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
991 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
992 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
993 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
994 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
995 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
996 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
997
998 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
999
1000 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1001
1002 End(_) => None,
1003 }
1004 }
1005}
1006
1007impl fmt::Debug for Payload<'_> {
1008 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1009 use Payload::*;
1010 match self {
1011 Version {
1012 num,
1013 encoding,
1014 range,
1015 } => f
1016 .debug_struct("Version")
1017 .field("num", num)
1018 .field("encoding", encoding)
1019 .field("range", range)
1020 .finish(),
1021
1022 // Module sections
1023 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1024 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1025 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1026 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1027 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1028 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1029 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1030 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1031 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1032 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1033 StartSection { func, range } => f
1034 .debug_struct("StartSection")
1035 .field("func", func)
1036 .field("range", range)
1037 .finish(),
1038 DataCountSection { count, range } => f
1039 .debug_struct("DataCountSection")
1040 .field("count", count)
1041 .field("range", range)
1042 .finish(),
1043 CodeSectionStart { count, range, size } => f
1044 .debug_struct("CodeSectionStart")
1045 .field("count", count)
1046 .field("range", range)
1047 .field("size", size)
1048 .finish(),
1049 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1050
1051 // Component sections
1052 ModuleSection { parser: _, range } => f
1053 .debug_struct("ModuleSection")
1054 .field("range", range)
1055 .finish(),
1056 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1057 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1058 ComponentSection { parser: _, range } => f
1059 .debug_struct("ComponentSection")
1060 .field("range", range)
1061 .finish(),
1062 ComponentInstanceSection(_) => f
1063 .debug_tuple("ComponentInstanceSection")
1064 .field(&"...")
1065 .finish(),
1066 ComponentAliasSection(_) => f
1067 .debug_tuple("ComponentAliasSection")
1068 .field(&"...")
1069 .finish(),
1070 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1071 ComponentCanonicalSection(_) => f
1072 .debug_tuple("ComponentCanonicalSection")
1073 .field(&"...")
1074 .finish(),
1075 ComponentStartSection { .. } => f
1076 .debug_tuple("ComponentStartSection")
1077 .field(&"...")
1078 .finish(),
1079 ComponentImportSection(_) => f
1080 .debug_tuple("ComponentImportSection")
1081 .field(&"...")
1082 .finish(),
1083 ComponentExportSection(_) => f
1084 .debug_tuple("ComponentExportSection")
1085 .field(&"...")
1086 .finish(),
1087
1088 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1089
1090 UnknownSection { id, range, .. } => f
1091 .debug_struct("UnknownSection")
1092 .field("id", id)
1093 .field("range", range)
1094 .finish(),
1095
1096 End(offset) => f.debug_tuple("End").field(offset).finish(),
1097 }
1098 }
1099}
1100
1101fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1102 err.inner.needed_hint = None;
1103 err
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108 use super::*;
1109
1110 macro_rules! assert_matches {
1111 ($a:expr, $b:pat $(,)?) => {
1112 match $a {
1113 $b => {}
1114 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1115 }
1116 };
1117 }
1118
1119 #[test]
1120 fn header() {
1121 assert!(Parser::default().parse(&[], true).is_err());
1122 assert_matches!(
1123 Parser::default().parse(&[], false),
1124 Ok(Chunk::NeedMoreData(4)),
1125 );
1126 assert_matches!(
1127 Parser::default().parse(b"\0", false),
1128 Ok(Chunk::NeedMoreData(3)),
1129 );
1130 assert_matches!(
1131 Parser::default().parse(b"\0asm", false),
1132 Ok(Chunk::NeedMoreData(4)),
1133 );
1134 assert_matches!(
1135 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1136 Ok(Chunk::Parsed {
1137 consumed: 8,
1138 payload: Payload::Version { num: 1, .. },
1139 }),
1140 );
1141 }
1142
1143 #[test]
1144 fn header_iter() {
1145 for _ in Parser::default().parse_all(&[]) {}
1146 for _ in Parser::default().parse_all(b"\0") {}
1147 for _ in Parser::default().parse_all(b"\0asm") {}
1148 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1149 }
1150
1151 fn parser_after_header() -> Parser {
1152 let mut p = Parser::default();
1153 assert_matches!(
1154 p.parse(b"\0asm\x01\0\0\0", false),
1155 Ok(Chunk::Parsed {
1156 consumed: 8,
1157 payload: Payload::Version {
1158 num: WASM_MODULE_VERSION,
1159 encoding: Encoding::Module,
1160 ..
1161 },
1162 }),
1163 );
1164 p
1165 }
1166
1167 fn parser_after_component_header() -> Parser {
1168 let mut p = Parser::default();
1169 assert_matches!(
1170 p.parse(b"\0asm\x0c\0\x01\0", false),
1171 Ok(Chunk::Parsed {
1172 consumed: 8,
1173 payload: Payload::Version {
1174 num: WASM_COMPONENT_VERSION,
1175 encoding: Encoding::Component,
1176 ..
1177 },
1178 }),
1179 );
1180 p
1181 }
1182
1183 #[test]
1184 fn start_section() {
1185 assert_matches!(
1186 parser_after_header().parse(&[], false),
1187 Ok(Chunk::NeedMoreData(1)),
1188 );
1189 assert!(parser_after_header().parse(&[8], true).is_err());
1190 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1191 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1192 assert_matches!(
1193 parser_after_header().parse(&[8], false),
1194 Ok(Chunk::NeedMoreData(1)),
1195 );
1196 assert_matches!(
1197 parser_after_header().parse(&[8, 1], false),
1198 Ok(Chunk::NeedMoreData(1)),
1199 );
1200 assert_matches!(
1201 parser_after_header().parse(&[8, 2], false),
1202 Ok(Chunk::NeedMoreData(2)),
1203 );
1204 assert_matches!(
1205 parser_after_header().parse(&[8, 1, 1], false),
1206 Ok(Chunk::Parsed {
1207 consumed: 3,
1208 payload: Payload::StartSection { func: 1, .. },
1209 }),
1210 );
1211 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1212 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1213 }
1214
1215 #[test]
1216 fn end_works() {
1217 assert_matches!(
1218 parser_after_header().parse(&[], true),
1219 Ok(Chunk::Parsed {
1220 consumed: 0,
1221 payload: Payload::End(8),
1222 }),
1223 );
1224 }
1225
1226 #[test]
1227 fn type_section() {
1228 assert!(parser_after_header().parse(&[1], true).is_err());
1229 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1230 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1231 assert_matches!(
1232 parser_after_header().parse(&[1], false),
1233 Ok(Chunk::NeedMoreData(1)),
1234 );
1235 assert_matches!(
1236 parser_after_header().parse(&[1, 1], false),
1237 Ok(Chunk::NeedMoreData(1)),
1238 );
1239 assert_matches!(
1240 parser_after_header().parse(&[1, 1, 1], false),
1241 Ok(Chunk::Parsed {
1242 consumed: 3,
1243 payload: Payload::TypeSection(_),
1244 }),
1245 );
1246 assert_matches!(
1247 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1248 Ok(Chunk::Parsed {
1249 consumed: 3,
1250 payload: Payload::TypeSection(_),
1251 }),
1252 );
1253 }
1254
1255 #[test]
1256 fn custom_section() {
1257 assert!(parser_after_header().parse(&[0], true).is_err());
1258 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1259 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1260 assert_matches!(
1261 parser_after_header().parse(&[0, 2, 1], false),
1262 Ok(Chunk::NeedMoreData(1)),
1263 );
1264 assert_matches!(
1265 parser_after_header().parse(&[0, 1, 0], false),
1266 Ok(Chunk::Parsed {
1267 consumed: 3,
1268 payload: Payload::CustomSection(CustomSectionReader {
1269 name: "",
1270 data_offset: 11,
1271 data: b"",
1272 range: Range { start: 10, end: 11 },
1273 }),
1274 }),
1275 );
1276 assert_matches!(
1277 parser_after_header().parse(&[0, 2, 1, b'a'], false),
1278 Ok(Chunk::Parsed {
1279 consumed: 4,
1280 payload: Payload::CustomSection(CustomSectionReader {
1281 name: "a",
1282 data_offset: 12,
1283 data: b"",
1284 range: Range { start: 10, end: 12 },
1285 }),
1286 }),
1287 );
1288 assert_matches!(
1289 parser_after_header().parse(&[0, 2, 0, b'a'], false),
1290 Ok(Chunk::Parsed {
1291 consumed: 4,
1292 payload: Payload::CustomSection(CustomSectionReader {
1293 name: "",
1294 data_offset: 11,
1295 data: b"a",
1296 range: Range { start: 10, end: 12 },
1297 }),
1298 }),
1299 );
1300 }
1301
1302 #[test]
1303 fn function_section() {
1304 assert!(parser_after_header().parse(&[10], true).is_err());
1305 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1306 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1307 assert_matches!(
1308 parser_after_header().parse(&[10], false),
1309 Ok(Chunk::NeedMoreData(1))
1310 );
1311 assert_matches!(
1312 parser_after_header().parse(&[10, 1], false),
1313 Ok(Chunk::NeedMoreData(1))
1314 );
1315 let mut p = parser_after_header();
1316 assert_matches!(
1317 p.parse(&[10, 1, 0], false),
1318 Ok(Chunk::Parsed {
1319 consumed: 3,
1320 payload: Payload::CodeSectionStart { count: 0, .. },
1321 }),
1322 );
1323 assert_matches!(
1324 p.parse(&[], true),
1325 Ok(Chunk::Parsed {
1326 consumed: 0,
1327 payload: Payload::End(11),
1328 }),
1329 );
1330 let mut p = parser_after_header();
1331 assert_matches!(
1332 p.parse(&[10, 2, 1, 0], false),
1333 Ok(Chunk::Parsed {
1334 consumed: 3,
1335 payload: Payload::CodeSectionStart { count: 1, .. },
1336 }),
1337 );
1338 assert_matches!(
1339 p.parse(&[0], false),
1340 Ok(Chunk::Parsed {
1341 consumed: 1,
1342 payload: Payload::CodeSectionEntry(_),
1343 }),
1344 );
1345 assert_matches!(
1346 p.parse(&[], true),
1347 Ok(Chunk::Parsed {
1348 consumed: 0,
1349 payload: Payload::End(12),
1350 }),
1351 );
1352
1353 // 1 byte section with 1 function can't read the function body because
1354 // the section is too small
1355 let mut p = parser_after_header();
1356 assert_matches!(
1357 p.parse(&[10, 1, 1], false),
1358 Ok(Chunk::Parsed {
1359 consumed: 3,
1360 payload: Payload::CodeSectionStart { count: 1, .. },
1361 }),
1362 );
1363 assert_eq!(
1364 p.parse(&[0], false).unwrap_err().message(),
1365 "unexpected end-of-file"
1366 );
1367
1368 // section with 2 functions but section is cut off
1369 let mut p = parser_after_header();
1370 assert_matches!(
1371 p.parse(&[10, 2, 2], false),
1372 Ok(Chunk::Parsed {
1373 consumed: 3,
1374 payload: Payload::CodeSectionStart { count: 2, .. },
1375 }),
1376 );
1377 assert_matches!(
1378 p.parse(&[0], false),
1379 Ok(Chunk::Parsed {
1380 consumed: 1,
1381 payload: Payload::CodeSectionEntry(_),
1382 }),
1383 );
1384 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1385 assert_eq!(
1386 p.parse(&[0], false).unwrap_err().message(),
1387 "unexpected end-of-file",
1388 );
1389
1390 // trailing data is bad
1391 let mut p = parser_after_header();
1392 assert_matches!(
1393 p.parse(&[10, 3, 1], false),
1394 Ok(Chunk::Parsed {
1395 consumed: 3,
1396 payload: Payload::CodeSectionStart { count: 1, .. },
1397 }),
1398 );
1399 assert_matches!(
1400 p.parse(&[0], false),
1401 Ok(Chunk::Parsed {
1402 consumed: 1,
1403 payload: Payload::CodeSectionEntry(_),
1404 }),
1405 );
1406 assert_eq!(
1407 p.parse(&[0], false).unwrap_err().message(),
1408 "trailing bytes at end of section",
1409 );
1410 }
1411
1412 #[test]
1413 fn single_module() {
1414 let mut p = parser_after_component_header();
1415 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1416
1417 // A module that's 8 bytes in length
1418 let mut sub = match p.parse(&[1, 8], false) {
1419 Ok(Chunk::Parsed {
1420 consumed: 2,
1421 payload: Payload::ModuleSection { parser, .. },
1422 }) => parser,
1423 other => panic!("bad parse {:?}", other),
1424 };
1425
1426 // Parse the header of the submodule with the sub-parser.
1427 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1428 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1429 assert_matches!(
1430 sub.parse(b"\0asm\x01\0\0\0", false),
1431 Ok(Chunk::Parsed {
1432 consumed: 8,
1433 payload: Payload::Version {
1434 num: 1,
1435 encoding: Encoding::Module,
1436 ..
1437 },
1438 }),
1439 );
1440
1441 // The sub-parser should be byte-limited so the next byte shouldn't get
1442 // consumed, it's intended for the parent parser.
1443 assert_matches!(
1444 sub.parse(&[10], false),
1445 Ok(Chunk::Parsed {
1446 consumed: 0,
1447 payload: Payload::End(18),
1448 }),
1449 );
1450
1451 // The parent parser should now be back to resuming, and we simulate it
1452 // being done with bytes to ensure that it's safely at the end,
1453 // completing the module code section.
1454 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1455 assert_matches!(
1456 p.parse(&[], true),
1457 Ok(Chunk::Parsed {
1458 consumed: 0,
1459 payload: Payload::End(18),
1460 }),
1461 );
1462 }
1463
1464 #[test]
1465 fn nested_section_too_big() {
1466 let mut p = parser_after_component_header();
1467
1468 // A module that's 10 bytes in length
1469 let mut sub = match p.parse(&[1, 10], false) {
1470 Ok(Chunk::Parsed {
1471 consumed: 2,
1472 payload: Payload::ModuleSection { parser, .. },
1473 }) => parser,
1474 other => panic!("bad parse {:?}", other),
1475 };
1476
1477 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1478 // module.
1479 assert_matches!(
1480 sub.parse(b"\0asm\x01\0\0\0", false),
1481 Ok(Chunk::Parsed {
1482 consumed: 8,
1483 payload: Payload::Version { num: 1, .. },
1484 }),
1485 );
1486
1487 // We can't parse a section which declares its bigger than the outer
1488 // module. This is a custom section, one byte big, with one content byte. The
1489 // content byte, however, lives outside of the parent's module code
1490 // section.
1491 assert_eq!(
1492 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1493 "section too large",
1494 );
1495 }
1496}