bitcoin_internals/hex/
buf_encoder.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! Implements a buffered encoder.
4//!
5//! The main type of this module is [`BufEncoder`] which provides buffered hex encoding. Such is
6//! faster than the usual `write!(f, "{02x}", b)?` in a for loop because it reduces dynamic
7//! dispatch and decreases the number of allocations if a `String` is being created.
8
9use core::borrow::Borrow;
10
11pub use out_bytes::OutBytes;
12
13use super::Case;
14
15/// Trait for types that can be soundly converted to `OutBytes`.
16///
17/// To protect the API from future breakage this sealed trait guards which types can be used with
18/// the `Encoder`. Currently it is implemented for byte arrays of various interesting lengths.
19///
20/// ## Safety
21///
22/// This is not `unsafe` yet but the `as_out_bytes` should always return the same reference if the
23/// same reference is supplied. IOW the returned memory address and length should be the same if
24/// the input memory address and length are the same.
25///
26/// If the trait ever becomes `unsafe` this will be required for soundness.
27pub trait AsOutBytes: out_bytes::Sealed {
28    /// Performs the conversion.
29    fn as_out_bytes(&self) -> &OutBytes;
30
31    /// Performs the conversion.
32    fn as_mut_out_bytes(&mut self) -> &mut OutBytes;
33}
34
35/// A buffer with compile-time-known length.
36///
37/// This is essentially `Default + AsOutBytes` but supports lengths 1.41 doesn't.
38pub trait FixedLenBuf: Sized + AsOutBytes {
39    /// Creates an uninitialized buffer.
40    ///
41    /// The current implementtions initialize the buffer with zeroes but it should be treated a
42    /// uninitialized anyway.
43    fn uninit() -> Self;
44}
45
46/// Implements `OutBytes`
47///
48/// This prevents the rest of the crate from accessing the field of `OutBytes`.
49mod out_bytes {
50    use super::AsOutBytes;
51
52    /// A byte buffer that can only be written-into.
53    ///
54    /// You shouldn't concern yourself with this, just call `BufEncoder::new` with your array.
55    ///
56    /// This prepares the API for potential future support of `[MaybeUninit<u8>]`. We don't want to use
57    /// `unsafe` until it's proven to be needed but if it does we have an easy, compatible upgrade
58    /// option.
59    ///
60    /// Warning: `repr(transparent)` is an internal implementation detail and **must not** be
61    /// relied on!
62    #[repr(transparent)]
63    pub struct OutBytes([u8]);
64
65    impl OutBytes {
66        /// Returns the first `len` bytes as initialized.
67        ///
68        /// Not `unsafe` because we don't use `unsafe` (yet).
69        ///
70        /// ## Panics
71        ///
72        /// The method panics if `len` is out of bounds.
73        #[track_caller]
74        pub(crate) fn assume_init(&self, len: usize) -> &[u8] { &self.0[..len] }
75
76        /// Writes given bytes into the buffer.
77        ///
78        /// ## Panics
79        ///
80        /// The method panics if pos is out of bounds or `bytes` don't fit into the buffer.
81        #[track_caller]
82        pub(crate) fn write(&mut self, pos: usize, bytes: &[u8]) {
83            self.0[pos..(pos + bytes.len())].copy_from_slice(bytes);
84        }
85
86        /// Returns the length of the buffer.
87        pub(crate) fn len(&self) -> usize { self.0.len() }
88
89        fn from_bytes(slice: &[u8]) -> &Self {
90            // SAFETY: copied from std
91            // conversion of reference to pointer of the same referred type is always sound,
92            // including in unsized types.
93            // Thanks to repr(transparent) the types have the same layout making the other
94            // conversion sound.
95            // The pointer was just created from a reference that's still alive so dereferencing is
96            // sound.
97            unsafe { &*(slice as *const [u8] as *const Self) }
98        }
99
100        fn from_mut_bytes(slice: &mut [u8]) -> &mut Self {
101            // SAFETY: copied from std
102            // conversion of reference to pointer of the same referred type is always sound,
103            // including in unsized types.
104            // Thanks to repr(transparent) the types have the same layout making the other
105            // conversion sound.
106            // The pointer was just created from a reference that's still alive so dereferencing is
107            // sound.
108            unsafe { &mut *(slice as *mut [u8] as *mut Self) }
109        }
110    }
111
112    macro_rules! impl_from_array {
113        ($($len:expr),* $(,)?) => {
114            $(
115                impl super::FixedLenBuf for [u8; $len] {
116                    fn uninit() -> Self {
117                        [0u8; $len]
118                    }
119                }
120
121                impl AsOutBytes for [u8; $len] {
122                    fn as_out_bytes(&self) -> &OutBytes {
123                        OutBytes::from_bytes(self)
124                    }
125
126                    fn as_mut_out_bytes(&mut self) -> &mut OutBytes {
127                        OutBytes::from_mut_bytes(self)
128                    }
129                }
130
131                impl Sealed for [u8; $len] {}
132
133                impl<'a> super::super::display::DisplayHex for &'a [u8; $len / 2] {
134                    type Display = super::super::display::DisplayArray<core::slice::Iter<'a, u8>, [u8; $len]>;
135                    fn as_hex(self) -> Self::Display {
136                        super::super::display::DisplayArray::new(self.iter())
137                    }
138
139                    fn hex_reserve_suggestion(self) -> usize {
140                        $len
141                    }
142                }
143            )*
144        }
145    }
146
147    impl<T: AsOutBytes + ?Sized> AsOutBytes for &'_ mut T {
148        fn as_out_bytes(&self) -> &OutBytes { (**self).as_out_bytes() }
149
150        fn as_mut_out_bytes(&mut self) -> &mut OutBytes { (**self).as_mut_out_bytes() }
151    }
152
153    impl<T: AsOutBytes + ?Sized> Sealed for &'_ mut T {}
154
155    impl AsOutBytes for OutBytes {
156        fn as_out_bytes(&self) -> &OutBytes { self }
157
158        fn as_mut_out_bytes(&mut self) -> &mut OutBytes { self }
159    }
160
161    impl Sealed for OutBytes {}
162
163    // As a sanity check we only provide conversions for even, non-empty arrays.
164    // Weird lengths 66 and 130 are provided for serialized public keys.
165    impl_from_array!(
166        2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 40, 64, 66, 128, 130, 256, 512,
167        1024, 2048, 4096, 8192
168    );
169
170    /// Prevents outside crates from implementing the trait
171    pub trait Sealed {}
172}
173
174/// Hex-encodes bytes into the provided buffer.
175///
176/// This is an important building block for fast hex-encoding. Because string writing tools
177/// provided by `core::fmt` involve dynamic dispatch and don't allow reserving capacity in strings
178/// buffering the hex and then formatting it is significantly faster.
179pub struct BufEncoder<T: AsOutBytes> {
180    buf: T,
181    pos: usize,
182}
183
184impl<T: AsOutBytes> BufEncoder<T> {
185    /// Creates an empty `BufEncoder`.
186    ///
187    /// This is usually used with uninitialized (zeroed) byte array allocated on stack.
188    /// This can only be constructed with an even-length, non-empty array.
189    #[inline]
190    pub fn new(buf: T) -> Self { BufEncoder { buf, pos: 0 } }
191
192    /// Encodes `byte` as hex in given `case` and appends it to the buffer.
193    ///
194    /// ## Panics
195    ///
196    /// The method panics if the buffer is full.
197    #[inline]
198    #[track_caller]
199    pub fn put_byte(&mut self, byte: u8, case: Case) {
200        self.buf.as_mut_out_bytes().write(self.pos, &super::byte_to_hex(byte, case.table()));
201        self.pos += 2;
202    }
203
204    /// Encodes `bytes` as hex in given `case` and appends them to the buffer.
205    ///
206    /// ## Panics
207    ///
208    /// The method panics if the bytes wouldn't fit the buffer.
209    #[inline]
210    #[track_caller]
211    pub fn put_bytes<I>(&mut self, bytes: I, case: Case)
212    where
213        I: IntoIterator,
214        I::Item: Borrow<u8>,
215    {
216        self.put_bytes_inner(bytes.into_iter(), case)
217    }
218
219    #[inline]
220    #[track_caller]
221    fn put_bytes_inner<I>(&mut self, bytes: I, case: Case)
222    where
223        I: Iterator,
224        I::Item: Borrow<u8>,
225    {
226        // May give the compiler better optimization opportunity
227        if let Some(max) = bytes.size_hint().1 {
228            assert!(max <= self.space_remaining());
229        }
230        for byte in bytes {
231            self.put_byte(*byte.borrow(), case);
232        }
233    }
234
235    /// Encodes as many `bytes` as fit into the buffer as hex and return the remainder.
236    ///
237    /// This method works just like `put_bytes` but instead of panicking it returns the unwritten
238    /// bytes. The method returns an empty slice if all bytes were written
239    #[must_use = "this may write only part of the input buffer"]
240    #[inline]
241    #[track_caller]
242    pub fn put_bytes_min<'a>(&mut self, bytes: &'a [u8], case: Case) -> &'a [u8] {
243        let to_write = self.space_remaining().min(bytes.len());
244        self.put_bytes(&bytes[..to_write], case);
245        &bytes[to_write..]
246    }
247
248    /// Returns true if no more bytes can be written into the buffer.
249    #[inline]
250    pub fn is_full(&self) -> bool { self.pos == self.buf.as_out_bytes().len() }
251
252    /// Returns the written bytes as a hex `str`.
253    #[inline]
254    pub fn as_str(&self) -> &str {
255        core::str::from_utf8(self.buf.as_out_bytes().assume_init(self.pos))
256            .expect("we only write ASCII")
257    }
258
259    /// Resets the buffer to become empty.
260    #[inline]
261    pub fn clear(&mut self) { self.pos = 0; }
262
263    /// How many bytes can be written to this buffer.
264    ///
265    /// Note that this returns the number of bytes before encoding, not number of hex digits.
266    #[inline]
267    pub fn space_remaining(&self) -> usize { (self.buf.as_out_bytes().len() - self.pos) / 2 }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn empty() {
276        let mut buf = [0u8; 2];
277        let encoder = BufEncoder::new(&mut buf);
278        assert_eq!(encoder.as_str(), "");
279        assert!(!encoder.is_full());
280    }
281
282    #[test]
283    fn single_byte_exact_buf() {
284        let mut buf = [0u8; 2];
285        let mut encoder = BufEncoder::new(&mut buf);
286        assert_eq!(encoder.space_remaining(), 1);
287        encoder.put_byte(42, Case::Lower);
288        assert_eq!(encoder.as_str(), "2a");
289        assert_eq!(encoder.space_remaining(), 0);
290        assert!(encoder.is_full());
291        encoder.clear();
292        assert_eq!(encoder.space_remaining(), 1);
293        assert!(!encoder.is_full());
294        encoder.put_byte(42, Case::Upper);
295        assert_eq!(encoder.as_str(), "2A");
296        assert_eq!(encoder.space_remaining(), 0);
297        assert!(encoder.is_full());
298    }
299
300    #[test]
301    fn single_byte_oversized_buf() {
302        let mut buf = [0u8; 4];
303        let mut encoder = BufEncoder::new(&mut buf);
304        assert_eq!(encoder.space_remaining(), 2);
305        encoder.put_byte(42, Case::Lower);
306        assert_eq!(encoder.space_remaining(), 1);
307        assert_eq!(encoder.as_str(), "2a");
308        assert!(!encoder.is_full());
309        encoder.clear();
310        assert_eq!(encoder.space_remaining(), 2);
311        encoder.put_byte(42, Case::Upper);
312        assert_eq!(encoder.as_str(), "2A");
313        assert_eq!(encoder.space_remaining(), 1);
314        assert!(!encoder.is_full());
315    }
316
317    #[test]
318    fn two_bytes() {
319        let mut buf = [0u8; 4];
320        let mut encoder = BufEncoder::new(&mut buf);
321        encoder.put_byte(42, Case::Lower);
322        assert_eq!(encoder.space_remaining(), 1);
323        encoder.put_byte(255, Case::Lower);
324        assert_eq!(encoder.space_remaining(), 0);
325        assert_eq!(encoder.as_str(), "2aff");
326        assert!(encoder.is_full());
327        encoder.clear();
328        assert!(!encoder.is_full());
329        encoder.put_byte(42, Case::Upper);
330        encoder.put_byte(255, Case::Upper);
331        assert_eq!(encoder.as_str(), "2AFF");
332        assert!(encoder.is_full());
333    }
334
335    #[test]
336    fn put_bytes_min() {
337        let mut buf = [0u8; 2];
338        let mut encoder = BufEncoder::new(&mut buf);
339        let remainder = encoder.put_bytes_min(b"", Case::Lower);
340        assert_eq!(remainder, b"");
341        assert_eq!(encoder.as_str(), "");
342        let remainder = encoder.put_bytes_min(b"*", Case::Lower);
343        assert_eq!(remainder, b"");
344        assert_eq!(encoder.as_str(), "2a");
345        encoder.clear();
346        let remainder = encoder.put_bytes_min(&[42, 255], Case::Lower);
347        assert_eq!(remainder, &[255]);
348        assert_eq!(encoder.as_str(), "2a");
349    }
350
351    #[test]
352    fn same_as_fmt() {
353        use core::fmt::{self, Write};
354
355        struct Writer {
356            buf: [u8; 2],
357            pos: usize,
358        }
359
360        impl Writer {
361            fn as_str(&self) -> &str { core::str::from_utf8(&self.buf[..self.pos]).unwrap() }
362        }
363
364        impl Write for Writer {
365            fn write_str(&mut self, s: &str) -> fmt::Result {
366                assert!(self.pos <= 2);
367                if s.len() > 2 - self.pos {
368                    Err(fmt::Error)
369                } else {
370                    self.buf[self.pos..(self.pos + s.len())].copy_from_slice(s.as_bytes());
371                    self.pos += s.len();
372                    Ok(())
373                }
374            }
375        }
376
377        let mut writer = Writer { buf: [0u8; 2], pos: 0 };
378        let mut buf = [0u8; 2];
379        let mut encoder = BufEncoder::new(&mut buf);
380
381        for i in 0..=255 {
382            write!(writer, "{:02x}", i).unwrap();
383            encoder.put_byte(i, Case::Lower);
384            assert_eq!(encoder.as_str(), writer.as_str());
385            writer.pos = 0;
386            encoder.clear();
387        }
388        for i in 0..=255 {
389            write!(writer, "{:02X}", i).unwrap();
390            encoder.put_byte(i, Case::Upper);
391            assert_eq!(encoder.as_str(), writer.as_str());
392            writer.pos = 0;
393            encoder.clear();
394        }
395    }
396}