1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
//! Data structures to provide transformation of the source

use crate::obj::ELF_WASMTIME_ADDRMAP;
use object::write::{Object, StandardSegment};
use object::{Bytes, LittleEndian, SectionKind, U32Bytes};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
use std::ops::Range;

/// Single source location to generated address mapping.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct InstructionAddressMap {
    /// Where in the source wasm binary this instruction comes from, specified
    /// in an offset of bytes from the front of the file.
    pub srcloc: FilePos,

    /// Offset from the start of the function's compiled code to where this
    /// instruction is located, or the region where it starts.
    pub code_offset: u32,
}

/// A position within an original source file,
///
/// This structure is used as a newtype wrapper around a 32-bit integer which
/// represents an offset within a file where a wasm instruction or function is
/// to be originally found.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct FilePos(u32);

impl FilePos {
    /// Create a new file position with the given offset.
    pub fn new(pos: u32) -> FilePos {
        assert!(pos != u32::MAX);
        FilePos(pos)
    }

    /// Returns the offset that this offset was created with.
    ///
    /// Note that the `Default` implementation will return `None` here, whereas
    /// positions created with `FilePos::new` will return `Some`.
    pub fn file_offset(self) -> Option<u32> {
        if self.0 == u32::MAX {
            None
        } else {
            Some(self.0)
        }
    }
}

impl Default for FilePos {
    fn default() -> FilePos {
        FilePos(u32::MAX)
    }
}

/// Builder for the address map section of a wasmtime compilation image.
///
/// This builder is used to conveniently built the `ELF_WASMTIME_ADDRMAP`
/// section by compilers, and provides utilities to directly insert the results
/// into an `Object`.
#[derive(Default)]
pub struct AddressMapSection {
    offsets: Vec<U32Bytes<LittleEndian>>,
    positions: Vec<U32Bytes<LittleEndian>>,
    last_offset: u32,
}

impl AddressMapSection {
    /// Pushes a new set of instruction mapping information for a function added
    /// in the exectuable.
    ///
    /// The `func` argument here is the range of the function, relative to the
    /// start of the text section in the executable. The `instrs` provided are
    /// the descriptors for instructions in the function and their various
    /// mappings back to original source positions.
    ///
    /// This is required to be called for `func` values that are strictly
    /// increasing in addresses (e.g. as the object is built). Additionally the
    /// `instrs` map must be sorted based on code offset in the native text
    /// section.
    pub fn push(&mut self, func: Range<u64>, instrs: &[InstructionAddressMap]) {
        // NB: for now this only supports <=4GB text sections in object files.
        // Alternative schemes will need to be created for >32-bit offsets to
        // avoid making this section overly large.
        let func_start = u32::try_from(func.start).unwrap();
        let func_end = u32::try_from(func.end).unwrap();

        self.offsets.reserve(instrs.len());
        self.positions.reserve(instrs.len());
        for map in instrs {
            // Sanity-check to ensure that functions are pushed in-order, otherwise
            // the `offsets` array won't be sorted which is our goal.
            let pos = func_start + map.code_offset;
            assert!(pos >= self.last_offset);
            self.offsets.push(U32Bytes::new(LittleEndian, pos));
            self.positions
                .push(U32Bytes::new(LittleEndian, map.srcloc.0));
            self.last_offset = pos;
        }
        self.last_offset = func_end;
    }

    /// Finishes encoding this section into the `Object` provided.
    pub fn append_to(self, obj: &mut Object) {
        let section = obj.add_section(
            obj.segment_name(StandardSegment::Data).to_vec(),
            ELF_WASMTIME_ADDRMAP.as_bytes().to_vec(),
            SectionKind::ReadOnlyData,
        );

        // NB: this matches the encoding expected by `lookup` below.
        let amt = u32::try_from(self.offsets.len()).unwrap();
        obj.append_section_data(section, &amt.to_le_bytes(), 1);
        obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1);
        obj.append_section_data(section, object::bytes_of_slice(&self.positions), 1);
    }
}

/// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets
/// and the slice of associated file positions for each offset.
fn parse_address_map(
    section: &[u8],
) -> Option<(&[U32Bytes<LittleEndian>], &[U32Bytes<LittleEndian>])> {
    let mut section = Bytes(section);
    // NB: this matches the encoding written by `append_to` above.
    let count = section.read::<U32Bytes<LittleEndian>>().ok()?;
    let count = usize::try_from(count.get(LittleEndian)).ok()?;
    let (offsets, section) =
        object::slice_from_bytes::<U32Bytes<LittleEndian>>(section.0, count).ok()?;
    let (positions, section) =
        object::slice_from_bytes::<U32Bytes<LittleEndian>>(section, count).ok()?;
    debug_assert!(section.is_empty());
    Some((offsets, positions))
}

/// Lookup an `offset` within an encoded address map section, returning the
/// original `FilePos` that corresponds to the offset, if found.
///
/// This function takes a `section` as its first argument which must have been
/// created with `AddressMapSection` above. This is intended to be the raw
/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
///
/// The `offset` provided is a relative offset from the start of the text
/// section of the pc that is being looked up. If `offset` is out of range or
/// doesn't correspond to anything in this file then `None` is returned.
pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos> {
    let (offsets, positions) = parse_address_map(section)?;

    // First perform a binary search on the `offsets` array. This is a sorted
    // array of offsets within the text section, which is conveniently what our
    // `offset` also is. Note that we are somewhat unlikely to find a precise
    // match on the element in the array, so we're largely interested in which
    // "bucket" the `offset` falls into.
    let offset = u32::try_from(offset).ok()?;
    let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) {
        // Exact hit!
        Ok(i) => i,

        // This *would* be at the first slot in the array, so no
        // instructions cover `pc`.
        Err(0) => return None,

        // This would be at the `nth` slot, so we're at the `n-1`th slot.
        Err(n) => n - 1,
    };

    // Using the `index` we found of which bucket `offset` corresponds to we can
    // lookup the actual `FilePos` value in the `positions` array.
    let pos = positions.get(index)?;
    Some(FilePos(pos.get(LittleEndian)))
}

/// Iterate over the address map contained in the given address map section.
///
/// This function takes a `section` as its first argument which must have been
/// created with `AddressMapSection` above. This is intended to be the raw
/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
///
/// The yielded offsets are relative to the start of the text section for this
/// map's code object.
pub fn iterate_address_map<'a>(
    section: &'a [u8],
) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a> {
    let (offsets, positions) = parse_address_map(section)?;

    Some(
        offsets
            .iter()
            .map(|o| o.get(LittleEndian))
            .zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))),
    )
}