1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
//! A hashmap with "external hashing": nodes are hashed or compared for
//! equality only with some external context provided on lookup/insert.
//! This allows very memory-efficient data structures where
//! node-internal data references some other storage (e.g., offsets into
//! an array or pool of shared data).
use hashbrown::raw::RawTable;
use std::hash::{Hash, Hasher};
/// Trait that allows for equality comparison given some external
/// context.
///
/// Note that this trait is implemented by the *context*, rather than
/// the item type, for somewhat complex lifetime reasons (lack of GATs
/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
/// the value type).
pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
/// Determine whether `a` and `b` are equal, given the context in
/// `self` and the union-find data structure `uf`.
fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
}
/// Trait that allows for hashing given some external context.
pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
/// Compute the hash of `value`, given the context in `self` and
/// the union-find data structure `uf`.
fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
}
/// A null-comparator context type for underlying value types that
/// already have `Eq` and `Hash`.
#[derive(Default)]
pub struct NullCtx;
impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
fn ctx_eq(&self, a: &V, b: &V) -> bool {
a.eq(b)
}
}
impl<V: Eq + Hash> CtxHash<V> for NullCtx {
fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
value.hash(state);
}
}
/// A bucket in the hash table.
///
/// Some performance-related design notes: we cache the hashcode for
/// speed, as this often buys a few percent speed in
/// interning-table-heavy workloads. We only keep the low 32 bits of
/// the hashcode, for memory efficiency: in common use, `K` and `V`
/// are often 32 bits also, and a 12-byte bucket is measurably better
/// than a 16-byte bucket.
struct BucketData<K, V> {
hash: u32,
k: K,
v: V,
}
/// A HashMap that takes external context for all operations.
pub struct CtxHashMap<K, V> {
raw: RawTable<BucketData<K, V>>,
}
impl<K, V> CtxHashMap<K, V> {
/// Create an empty hashmap with pre-allocated space for the given
/// capacity.
pub fn with_capacity(capacity: usize) -> Self {
Self {
raw: RawTable::with_capacity(capacity),
}
}
}
fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
where
Ctx: CtxHash<K>,
{
let mut hasher = crate::fx::FxHasher::default();
ctx.ctx_hash(&mut hasher, k);
hasher.finish() as u32
}
impl<K, V> CtxHashMap<K, V> {
/// Insert a new key-value pair, returning the old value associated
/// with this key (if any).
pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
where
Ctx: CtxEq<K, K> + CtxHash<K>,
{
let hash = compute_hash(ctx, &k);
match self.raw.find(hash as u64, |bucket| {
hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
}) {
Some(bucket) => {
let data = unsafe { bucket.as_mut() };
Some(std::mem::replace(&mut data.v, v))
}
None => {
let data = BucketData { hash, k, v };
self.raw
.insert_entry(hash as u64, data, |bucket| bucket.hash as u64);
None
}
}
}
/// Look up a key, returning a borrow of the value if present.
pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
where
Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
{
let hash = compute_hash(ctx, k);
self.raw
.find(hash as u64, |bucket| {
hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
})
.map(|bucket| {
let data = unsafe { bucket.as_ref() };
&data.v
})
}
}
#[cfg(test)]
mod test {
use super::*;
use std::hash::Hash;
#[derive(Clone, Copy, Debug)]
struct Key {
index: u32,
}
struct Ctx {
vals: &'static [&'static str],
}
impl CtxEq<Key, Key> for Ctx {
fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
self.vals[a.index as usize].eq(self.vals[b.index as usize])
}
}
impl CtxHash<Key> for Ctx {
fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
self.vals[value.index as usize].hash(state);
}
}
#[test]
fn test_basic() {
let ctx = Ctx {
vals: &["a", "b", "a"],
};
let k0 = Key { index: 0 };
let k1 = Key { index: 1 };
let k2 = Key { index: 2 };
assert!(ctx.ctx_eq(&k0, &k2));
assert!(!ctx.ctx_eq(&k0, &k1));
assert!(!ctx.ctx_eq(&k2, &k1));
let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
assert_eq!(map.insert(k0, 42, &ctx), None);
assert_eq!(map.insert(k2, 84, &ctx), Some(42));
assert_eq!(map.get(&k1, &ctx), None);
assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
}
}