1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
use crate::bytes;
use crate::compress::{max_compress_len, Encoder};
use crate::crc32::CheckSummer;
use crate::error::Error;
use crate::MAX_BLOCK_SIZE;

/// The maximum chunk of compressed bytes that can be processed at one time.
///
/// This is computed via `max_compress_len(MAX_BLOCK_SIZE)`.
///
/// TODO(ag): Replace with const fn once they support nominal branching.
pub const MAX_COMPRESS_BLOCK_SIZE: usize = 76490;

/// The special magic string that starts any stream.
///
/// This may appear more than once in a stream in order to support easy
/// concatenation of files compressed in the Snappy frame format.
pub const STREAM_IDENTIFIER: &'static [u8] = b"\xFF\x06\x00\x00sNaPpY";

/// The body of the special stream identifier.
pub const STREAM_BODY: &'static [u8] = b"sNaPpY";

/// The length of a snappy chunk type (1 byte), packet length (3 bytes)
/// and CRC field (4 bytes). This is technically the chunk header _plus_
/// the CRC present in most chunks.
pub const CHUNK_HEADER_AND_CRC_SIZE: usize = 8;

/// An enumeration describing each of the 4 main chunk types.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ChunkType {
    Stream = 0xFF,
    Compressed = 0x00,
    Uncompressed = 0x01,
    Padding = 0xFE,
}

impl ChunkType {
    /// Converts a byte to one of the four defined chunk types represented by
    /// a single byte. If the chunk type is reserved, then it is returned as
    /// an Err.
    pub fn from_u8(b: u8) -> Result<ChunkType, u8> {
        match b {
            0xFF => Ok(ChunkType::Stream),
            0x00 => Ok(ChunkType::Compressed),
            0x01 => Ok(ChunkType::Uncompressed),
            0xFE => Ok(ChunkType::Padding),
            b => Err(b),
        }
    }
}

/// Compress a single frame (or decide to pass it through uncompressed). This
/// will output a frame header in `dst_chunk_header`, and it will return a slice
/// pointing to the data to use in the frame. The `dst_chunk_header` array must
/// always have a size of 8 bytes.
///
/// If `always_use_dst` is set to false, the return value may point into either
/// `src` (for data we couldn't compress) or into `dst` (for data we could
/// compress). If `always_use_dst` is true, the data will always be in `dst`.
/// This is a bit weird, but because of Rust's ownership rules, it's easiest
/// for a single function to always be in charge of writing to `dst`.
pub fn compress_frame<'a>(
    enc: &mut Encoder,
    checksummer: CheckSummer,
    src: &'a [u8],
    dst_chunk_header: &mut [u8],
    dst: &'a mut [u8],
    always_use_dst: bool,
) -> Result<&'a [u8], Error> {
    // This is a purely internal function, with a bunch of preconditions.
    assert!(src.len() <= MAX_BLOCK_SIZE);
    assert!(dst.len() >= max_compress_len(MAX_BLOCK_SIZE));
    assert_eq!(dst_chunk_header.len(), CHUNK_HEADER_AND_CRC_SIZE);

    // Build a checksum of our _uncompressed_ data.
    let checksum = checksummer.crc32c_masked(src);

    // Compress the buffer. If compression sucked, throw it out and
    // write uncompressed bytes instead. Since our buffer is at most
    // MAX_BLOCK_SIZE and our dst buffer has size
    // max_compress_len(MAX_BLOCK_SIZE), we have enough space.
    let compress_len = enc.compress(src, dst)?;
    let (chunk_type, chunk_len) =
        // We add 4 to the chunk_len because of the checksum.
        if compress_len >= src.len() - (src.len() / 8) {
            (ChunkType::Uncompressed, 4 + src.len())
        } else {
            (ChunkType::Compressed, 4 + compress_len)
        };

    dst_chunk_header[0] = chunk_type as u8;
    bytes::write_u24_le(chunk_len as u32, &mut dst_chunk_header[1..]);
    bytes::write_u32_le(checksum, &mut dst_chunk_header[4..]);

    // Return the data to put in our frame.
    if chunk_type == ChunkType::Compressed {
        Ok(&dst[0..compress_len])
    } else if always_use_dst {
        dst[..src.len()].copy_from_slice(src);
        Ok(&dst[..src.len()])
    } else {
        Ok(src)
    }
}