parity_db/btree/
iter.rs

1// Copyright 2021-2022 Parity Technologies (UK) Ltd.
2// This file is dual-licensed as Apache-2.0 or MIT.
3
4/// BTree iterator implementation.
5/// This does not enforce consistency.
6/// If a commit did happen after an
7/// iterator is created, the iterator will
8/// just replay a `seek` operation at its
9/// latest accessed key.u
10use super::*;
11use crate::{
12	btree::BTreeTable, db::CommitOverlay, error::Result, log::LogQuery, parking_lot::RwLock,
13	table::key::TableKeyQuery,
14};
15
16#[derive(Debug, PartialEq, Eq, Clone, Copy)]
17pub enum SeekTo<'a> {
18	Include(&'a [u8]),
19	Exclude(&'a [u8]),
20	Last,
21}
22
23impl<'a> SeekTo<'a> {
24	pub fn key(&self) -> Option<&'a [u8]> {
25		match self {
26			SeekTo::Include(key) => Some(key),
27			SeekTo::Exclude(key) => Some(key),
28			SeekTo::Last => None,
29		}
30	}
31}
32
33#[derive(Debug)]
34pub struct BTreeIterator<'a> {
35	table: &'a BTreeTable,
36	log: &'a RwLock<crate::log::LogOverlays>,
37	commit_overlay: &'a RwLock<Vec<CommitOverlay>>,
38	iter: BtreeIterBackend,
39	col: ColId,
40	pending_backend: Option<PendingBackend>,
41	last_key: LastKey,
42}
43
44type IterResult = Result<Option<(Vec<u8>, Vec<u8>)>>;
45
46#[derive(Debug)]
47struct PendingBackend {
48	next_item: Option<(Vec<u8>, Vec<u8>)>,
49	direction: IterDirection,
50}
51
52#[derive(Debug)]
53pub enum LastKey {
54	Start,
55	End,
56	At(Vec<u8>),
57	Seeked(Vec<u8>),
58}
59
60#[derive(Debug)]
61pub enum LastIndex {
62	Seeked(usize),
63	At(usize),
64	Before(usize),
65	Descend(usize),
66}
67
68#[derive(Debug)]
69pub struct BtreeIterBackend(BTree, BTreeIterState);
70
71impl<'a> BTreeIterator<'a> {
72	pub(crate) fn new(
73		table: &'a BTreeTable,
74		col: ColId,
75		log: &'a RwLock<crate::log::LogOverlays>,
76		commit_overlay: &'a RwLock<Vec<CommitOverlay>>,
77	) -> Result<Self> {
78		let record_id = log.read().last_record_id(col);
79		let tree = table.with_locked(|btree| BTree::open(btree, log, record_id))?;
80		let iter = BTreeIterState::new(tree.record_id);
81		Ok(BTreeIterator {
82			table,
83			iter: BtreeIterBackend(tree, iter),
84			col,
85			pending_backend: None,
86			last_key: LastKey::Start,
87			log,
88			commit_overlay,
89		})
90	}
91
92	pub fn seek(&mut self, key: &[u8]) -> Result<()> {
93		// seek require log do not change
94		let log = self.log.read();
95		let record_id = log.last_record_id(self.col);
96		self.last_key = LastKey::Seeked(key.to_vec());
97		self.pending_backend = None;
98		self.seek_backend(SeekTo::Include(key), record_id, self.table, &*log)
99	}
100
101	pub fn seek_to_first(&mut self) -> Result<()> {
102		self.seek(&[])
103	}
104
105	pub fn seek_to_last(&mut self) -> Result<()> {
106		let log = self.log.read();
107		let record_id = log.last_record_id(self.col);
108		self.last_key = LastKey::End;
109		self.seek_backend_to_last(record_id, self.table, &*log)
110	}
111
112	#[allow(clippy::should_implement_trait)]
113	pub fn next(&mut self) -> IterResult {
114		self.iter_inner(IterDirection::Forward)
115	}
116
117	pub fn prev(&mut self) -> IterResult {
118		self.iter_inner(IterDirection::Backward)
119	}
120
121	fn iter_inner(&mut self, direction: IterDirection) -> IterResult {
122		let col = self.col;
123
124		loop {
125			// Lock log over function call (no btree struct change).
126			let commit_overlay = self.commit_overlay.read();
127			let next_commit_overlay =
128				commit_overlay.get(col as usize).and_then(|o| match direction {
129					IterDirection::Forward => o.btree_next(&self.last_key),
130					IterDirection::Backward => o.btree_prev(&self.last_key),
131				});
132			let log = self.log.read();
133			let record_id = log.last_record_id(self.col);
134			// No consistency over iteration, allows dropping lock to overlay.
135			drop(commit_overlay);
136			if record_id != self.iter.1.record_id {
137				self.pending_backend = None;
138			}
139			let next_from_pending = self
140				.pending_backend
141				.take()
142				.and_then(|pending| (pending.direction == direction).then_some(pending.next_item));
143			let next_backend = if let Some(pending) = next_from_pending {
144				pending
145			} else {
146				self.next_backend(record_id, self.table, &*log, direction)?
147			};
148			let result = match (next_commit_overlay, next_backend) {
149				(Some((commit_key, commit_value)), Some((backend_key, backend_value))) => {
150					match (direction, commit_key.value().cmp(&backend_key)) {
151						(IterDirection::Backward, std::cmp::Ordering::Greater) |
152						(IterDirection::Forward, std::cmp::Ordering::Less) => {
153							self.pending_backend = Some(PendingBackend {
154								next_item: Some((backend_key, backend_value)),
155								direction,
156							});
157							if let Some(value) = commit_value {
158								Some((commit_key.value().clone(), value.value().clone()))
159							} else {
160								self.last_key = LastKey::At(commit_key.value().clone());
161								continue
162							}
163						},
164						(IterDirection::Backward, std::cmp::Ordering::Less) |
165						(IterDirection::Forward, std::cmp::Ordering::Greater) => Some((backend_key, backend_value)),
166						(_, std::cmp::Ordering::Equal) =>
167							if let Some(value) = commit_value {
168								Some((backend_key, value.value().clone()))
169							} else {
170								self.last_key = LastKey::At(commit_key.value().clone());
171								continue
172							},
173					}
174				},
175				(Some((commit_key, Some(commit_value))), None) => {
176					self.pending_backend = Some(PendingBackend { next_item: None, direction });
177					Some((commit_key.value().clone(), commit_value.value().clone()))
178				},
179				(Some((k, None)), None) => {
180					self.pending_backend = Some(PendingBackend { next_item: None, direction });
181					self.last_key = LastKey::At(k.value().clone());
182					continue
183				},
184				(None, Some((backend_key, backend_value))) => Some((backend_key, backend_value)),
185				(None, None) => {
186					self.pending_backend = Some(PendingBackend { next_item: None, direction });
187					None
188				},
189			};
190
191			match result.as_ref() {
192				Some((key, _)) => {
193					self.last_key = LastKey::At(key.clone());
194				},
195				None =>
196					self.last_key = match direction {
197						IterDirection::Backward => LastKey::Start,
198						IterDirection::Forward => LastKey::End,
199					},
200			}
201			return Ok(result)
202		}
203	}
204
205	fn next_backend(
206		&mut self,
207		record_id: u64,
208		col: &BTreeTable,
209		log: &impl LogQuery,
210		direction: IterDirection,
211	) -> Result<Option<(Vec<u8>, Value)>> {
212		let BtreeIterBackend(tree, iter) = &mut self.iter;
213		if record_id != tree.record_id {
214			let new_tree = col.with_locked(|btree| BTree::open(btree, log, record_id))?;
215			*tree = new_tree;
216			match &self.last_key {
217				LastKey::At(last_key) => {
218					iter.seek(SeekTo::Exclude(last_key.as_slice()), tree, col, log)?;
219				},
220				LastKey::Seeked(last_key) => {
221					iter.seek(SeekTo::Include(last_key.as_slice()), tree, col, log)?;
222				},
223				LastKey::Start => {
224					iter.seek(SeekTo::Include(&[]), tree, col, log)?;
225				},
226				LastKey::End => {
227					iter.seek_to_last(tree, col, log)?;
228				},
229			}
230			iter.record_id = record_id;
231		}
232
233		iter.next(tree, col, log, direction)
234	}
235
236	fn seek_backend(
237		&mut self,
238		seek_to: SeekTo,
239		record_id: u64,
240		col: &BTreeTable,
241		log: &impl LogQuery,
242	) -> Result<()> {
243		let BtreeIterBackend(tree, iter) = &mut self.iter;
244		if record_id != tree.record_id {
245			let new_tree = col.with_locked(|btree| BTree::open(btree, log, record_id))?;
246			*tree = new_tree;
247			iter.record_id = record_id;
248		}
249		iter.seek(seek_to, tree, col, log)
250	}
251
252	fn seek_backend_to_last(
253		&mut self,
254		record_id: u64,
255		col: &BTreeTable,
256		log: &impl LogQuery,
257	) -> Result<()> {
258		let BtreeIterBackend(tree, iter) = &mut self.iter;
259		if record_id != tree.record_id {
260			let new_tree = col.with_locked(|btree| BTree::open(btree, log, record_id))?;
261			*tree = new_tree;
262			iter.record_id = record_id;
263		}
264		iter.seek_to_last(tree, col, log)
265	}
266}
267
268#[derive(Debug)]
269pub struct BTreeIterState {
270	state: Vec<(LastIndex, Node)>,
271	pub record_id: u64,
272}
273
274impl BTreeIterState {
275	pub fn new(record_id: u64) -> BTreeIterState {
276		BTreeIterState { state: vec![], record_id }
277	}
278
279	fn exit(&mut self, direction: IterDirection) -> bool {
280		while !self.state.is_empty() {
281			self.state.pop();
282			if let Some((ix, node)) = self.state.last_mut() {
283				debug_assert!(matches!(ix, LastIndex::Descend(_)));
284				if let LastIndex::Descend(child) = ix {
285					*ix = match direction {
286						IterDirection::Backward if *child == 0 => continue,
287						IterDirection::Forward
288							if *child == ORDER || node.separators[*child].separator.is_none() =>
289							continue,
290						_ => LastIndex::Before(*child),
291					};
292				} else {
293					self.state.clear(); // should actually be unreachable
294				}
295				return false
296			}
297		}
298		true
299	}
300
301	pub fn next(
302		&mut self,
303		btree: &mut BTree,
304		col: &BTreeTable,
305		log: &impl LogQuery,
306		direction: IterDirection,
307	) -> Result<Option<(Vec<u8>, Value)>> {
308		if self.state.is_empty() {
309			let root = col.with_locked(|tables| {
310				BTree::fetch_root(btree.root_index.unwrap_or(NULL_ADDRESS), tables, log)
311			})?;
312			self.state.push((node_start(&root, direction, btree.depth == 0), root));
313		}
314
315		loop {
316			let is_leaf = btree.depth as usize + 1 == self.state.len();
317			if let Some(state) = self.state.last_mut() {
318				let next = match (direction, &state.0) {
319					(_, LastIndex::Descend(sep)) => LastIndex::Descend(*sep),
320					(_, LastIndex::Seeked(sep)) => LastIndex::At(*sep),
321
322					(IterDirection::Forward, LastIndex::At(sep))
323						if is_leaf && *sep + 1 == ORDER =>
324						if self.exit(direction) {
325							break
326						} else {
327							continue
328						},
329					(IterDirection::Forward, LastIndex::At(sep)) if is_leaf =>
330						LastIndex::At(*sep + 1),
331					(IterDirection::Forward, LastIndex::At(sep)) => LastIndex::Descend(*sep + 1),
332					(IterDirection::Forward, LastIndex::Before(sep)) if *sep == ORDER => {
333						if self.exit(direction) {
334							break
335						} else {
336							continue
337						}
338					},
339					(IterDirection::Forward, LastIndex::Before(sep)) => LastIndex::At(*sep),
340
341					(IterDirection::Backward, LastIndex::At(sep)) if is_leaf && *sep == 0 => {
342						if self.exit(direction) {
343							break
344						} else {
345							continue
346						}
347					},
348					(IterDirection::Backward, LastIndex::At(sep)) if is_leaf =>
349						LastIndex::At(*sep - 1),
350					(IterDirection::Backward, LastIndex::At(sep)) => LastIndex::Descend(*sep),
351					(IterDirection::Backward, LastIndex::Before(sep)) if *sep == 0 => {
352						if self.exit(direction) {
353							break
354						} else {
355							continue
356						}
357					},
358					(IterDirection::Backward, LastIndex::Before(sep)) => LastIndex::At(*sep - 1),
359				};
360				match next {
361					LastIndex::At(at) =>
362						if let Some(address) = state.1.separator_address(at) {
363							state.0 = LastIndex::At(at);
364							let key = state.1.separator_key(at).unwrap();
365							let key_query = TableKeyQuery::Fetch(None);
366							let r = col.get_at_value_index(key_query, address, log)?;
367							return Ok(r.map(|r| (key, r.1)))
368						} else if self.exit(direction) {
369							break
370						},
371					LastIndex::Descend(child_ix) => {
372						if let Some(child) =
373							col.with_locked(|btree| state.1.fetch_child(child_ix, btree, log))?
374						{
375							if let Some((ix, _)) = self.state.last_mut() {
376								*ix = LastIndex::Descend(child_ix);
377							}
378							let is_child_leaf = btree.depth as usize == self.state.len();
379							self.state.push((node_start(&child, direction, is_child_leaf), child))
380						} else if self.exit(direction) {
381							break
382						}
383					},
384					_ => unreachable!(),
385				}
386			} else {
387				break
388			}
389		}
390
391		Ok(None)
392	}
393
394	pub fn seek(
395		&mut self,
396		seek_to: SeekTo,
397		btree: &mut BTree,
398		col: &BTreeTable,
399		log: &impl LogQuery,
400	) -> Result<()> {
401		self.state.clear();
402		col.with_locked(|b| {
403			let root = BTree::fetch_root(btree.root_index.unwrap_or(NULL_ADDRESS), b, log)?;
404			Node::seek(root, seek_to, b, log, btree.depth, &mut self.state)
405		})
406	}
407
408	pub fn seek_to_last(
409		&mut self,
410		btree: &mut BTree,
411		col: &BTreeTable,
412		log: &impl LogQuery,
413	) -> Result<()> {
414		self.seek(SeekTo::Last, btree, col, log)
415	}
416}
417
418fn node_start(node: &Node, direction: IterDirection, is_leaf: bool) -> LastIndex {
419	let ix = match direction {
420		IterDirection::Forward => 0,
421		IterDirection::Backward => node.last_separator_index().map(|i| i + 1).unwrap_or(0),
422	};
423
424	if is_leaf {
425		LastIndex::Before(ix)
426	} else {
427		LastIndex::Descend(ix)
428	}
429}