similar/algorithms/
compact.rs

1//! Implements basic compacting.  This is based on the compaction logic from
2//! diffy by Brandon Williams.
3use std::ops::Index;
4
5use crate::{DiffOp, DiffTag};
6
7use super::utils::{common_prefix_len, common_suffix_len};
8use super::DiffHook;
9
10/// Performs semantic cleanup operations on a diff.
11///
12/// This merges similar ops together but also tries to move hunks up and
13/// down the diff with the desire to connect as many hunks as possible.
14/// It still needs to be combined with [`Replace`](crate::algorithms::Replace)
15/// to get actual replace diff ops out.
16#[derive(Debug)]
17pub struct Compact<'old, 'new, Old: ?Sized, New: ?Sized, D> {
18    d: D,
19    ops: Vec<DiffOp>,
20    old: &'old Old,
21    new: &'new New,
22}
23
24impl<'old, 'new, Old, New, D> Compact<'old, 'new, Old, New, D>
25where
26    D: DiffHook,
27    Old: Index<usize> + ?Sized + 'old,
28    New: Index<usize> + ?Sized + 'new,
29    New::Output: PartialEq<Old::Output>,
30{
31    /// Creates a new compact hook wrapping another hook.
32    pub fn new(d: D, old: &'old Old, new: &'new New) -> Self {
33        Compact {
34            d,
35            ops: Vec::new(),
36            old,
37            new,
38        }
39    }
40
41    /// Extracts the inner hook.
42    pub fn into_inner(self) -> D {
43        self.d
44    }
45}
46
47impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsRef<D>
48    for Compact<'old, 'new, Old, New, D>
49{
50    fn as_ref(&self) -> &D {
51        &self.d
52    }
53}
54
55impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsMut<D>
56    for Compact<'old, 'new, Old, New, D>
57{
58    fn as_mut(&mut self) -> &mut D {
59        &mut self.d
60    }
61}
62
63impl<'old, 'new, Old, New, D> DiffHook for Compact<'old, 'new, Old, New, D>
64where
65    D: DiffHook,
66    Old: Index<usize> + ?Sized + 'old,
67    New: Index<usize> + ?Sized + 'new,
68    New::Output: PartialEq<Old::Output>,
69{
70    type Error = D::Error;
71
72    #[inline(always)]
73    fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
74        self.ops.push(DiffOp::Equal {
75            old_index,
76            new_index,
77            len,
78        });
79        Ok(())
80    }
81
82    #[inline(always)]
83    fn delete(
84        &mut self,
85        old_index: usize,
86        old_len: usize,
87        new_index: usize,
88    ) -> Result<(), Self::Error> {
89        self.ops.push(DiffOp::Delete {
90            old_index,
91            old_len,
92            new_index,
93        });
94        Ok(())
95    }
96
97    #[inline(always)]
98    fn insert(
99        &mut self,
100        old_index: usize,
101        new_index: usize,
102        new_len: usize,
103    ) -> Result<(), Self::Error> {
104        self.ops.push(DiffOp::Insert {
105            old_index,
106            new_index,
107            new_len,
108        });
109        Ok(())
110    }
111
112    fn finish(&mut self) -> Result<(), Self::Error> {
113        cleanup_diff_ops(self.old, self.new, &mut self.ops);
114        for op in &self.ops {
115            op.apply_to_hook(&mut self.d)?;
116        }
117        self.d.finish()
118    }
119}
120
121// Walks through all edits and shifts them up and then down, trying to see if
122// they run into similar edits which can be merged.
123pub fn cleanup_diff_ops<Old, New>(old: &Old, new: &New, ops: &mut Vec<DiffOp>)
124where
125    Old: Index<usize> + ?Sized,
126    New: Index<usize> + ?Sized,
127    New::Output: PartialEq<Old::Output>,
128{
129    // First attempt to compact all Deletions
130    let mut pointer = 0;
131    while let Some(&op) = ops.get(pointer) {
132        if let DiffTag::Delete = op.tag() {
133            pointer = shift_diff_ops_up(ops, old, new, pointer);
134            pointer = shift_diff_ops_down(ops, old, new, pointer);
135        }
136        pointer += 1;
137    }
138
139    // Then attempt to compact all Insertions
140    let mut pointer = 0;
141    while let Some(&op) = ops.get(pointer) {
142        if let DiffTag::Insert = op.tag() {
143            pointer = shift_diff_ops_up(ops, old, new, pointer);
144            pointer = shift_diff_ops_down(ops, old, new, pointer);
145        }
146        pointer += 1;
147    }
148}
149
150fn shift_diff_ops_up<Old, New>(
151    ops: &mut Vec<DiffOp>,
152    old: &Old,
153    new: &New,
154    mut pointer: usize,
155) -> usize
156where
157    Old: Index<usize> + ?Sized,
158    New: Index<usize> + ?Sized,
159    New::Output: PartialEq<Old::Output>,
160{
161    while let Some(&prev_op) = pointer.checked_sub(1).and_then(|idx| ops.get(idx)) {
162        let this_op = ops[pointer];
163        match (this_op.tag(), prev_op.tag()) {
164            // Shift Inserts Upwards
165            (DiffTag::Insert, DiffTag::Equal) => {
166                let suffix_len =
167                    common_suffix_len(old, prev_op.old_range(), new, this_op.new_range());
168                if suffix_len > 0 {
169                    if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) {
170                        ops[pointer + 1].grow_left(suffix_len);
171                    } else {
172                        ops.insert(
173                            pointer + 1,
174                            DiffOp::Equal {
175                                old_index: prev_op.old_range().end - suffix_len,
176                                new_index: this_op.new_range().end - suffix_len,
177                                len: suffix_len,
178                            },
179                        );
180                    }
181                    ops[pointer].shift_left(suffix_len);
182                    ops[pointer - 1].shrink_left(suffix_len);
183
184                    if ops[pointer - 1].is_empty() {
185                        ops.remove(pointer - 1);
186                        pointer -= 1;
187                    }
188                } else if ops[pointer - 1].is_empty() {
189                    ops.remove(pointer - 1);
190                    pointer -= 1;
191                } else {
192                    // We can't shift upwards anymore
193                    break;
194                }
195            }
196            // Shift Deletions Upwards
197            (DiffTag::Delete, DiffTag::Equal) => {
198                // check common suffix for the amount we can shift
199                let suffix_len =
200                    common_suffix_len(old, prev_op.old_range(), new, this_op.new_range());
201                if suffix_len != 0 {
202                    if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) {
203                        ops[pointer + 1].grow_left(suffix_len);
204                    } else {
205                        let old_range = prev_op.old_range();
206                        ops.insert(
207                            pointer + 1,
208                            DiffOp::Equal {
209                                old_index: old_range.end - suffix_len,
210                                new_index: this_op.new_range().end - suffix_len,
211                                len: old_range.len() - suffix_len,
212                            },
213                        );
214                    }
215                    ops[pointer].shift_left(suffix_len);
216                    ops[pointer - 1].shrink_left(suffix_len);
217
218                    if ops[pointer - 1].is_empty() {
219                        ops.remove(pointer - 1);
220                        pointer -= 1;
221                    }
222                } else if ops[pointer - 1].is_empty() {
223                    ops.remove(pointer - 1);
224                    pointer -= 1;
225                } else {
226                    // We can't shift upwards anymore
227                    break;
228                }
229            }
230            // Swap the Delete and Insert
231            (DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => {
232                ops.swap(pointer - 1, pointer);
233                pointer -= 1;
234            }
235            // Merge the two ranges
236            (DiffTag::Insert, DiffTag::Insert) => {
237                ops[pointer - 1].grow_right(this_op.new_range().len());
238                ops.remove(pointer);
239                pointer -= 1;
240            }
241            (DiffTag::Delete, DiffTag::Delete) => {
242                ops[pointer - 1].grow_right(this_op.old_range().len());
243                ops.remove(pointer);
244                pointer -= 1;
245            }
246            _ => unreachable!("unexpected tag"),
247        }
248    }
249    pointer
250}
251
252fn shift_diff_ops_down<Old, New>(
253    ops: &mut Vec<DiffOp>,
254    old: &Old,
255    new: &New,
256    mut pointer: usize,
257) -> usize
258where
259    Old: Index<usize> + ?Sized,
260    New: Index<usize> + ?Sized,
261    New::Output: PartialEq<Old::Output>,
262{
263    while let Some(&next_op) = pointer.checked_add(1).and_then(|idx| ops.get(idx)) {
264        let this_op = ops[pointer];
265        match (this_op.tag(), next_op.tag()) {
266            // Shift Inserts Downwards
267            (DiffTag::Insert, DiffTag::Equal) => {
268                let prefix_len =
269                    common_prefix_len(old, next_op.old_range(), new, this_op.new_range());
270                if prefix_len > 0 {
271                    if let Some(DiffTag::Equal) = pointer
272                        .checked_sub(1)
273                        .and_then(|x| ops.get(x))
274                        .map(|x| x.tag())
275                    {
276                        ops[pointer - 1].grow_right(prefix_len);
277                    } else {
278                        ops.insert(
279                            pointer,
280                            DiffOp::Equal {
281                                old_index: next_op.old_range().start,
282                                new_index: this_op.new_range().start,
283                                len: prefix_len,
284                            },
285                        );
286                        pointer += 1;
287                    }
288                    ops[pointer].shift_right(prefix_len);
289                    ops[pointer + 1].shrink_right(prefix_len);
290
291                    if ops[pointer + 1].is_empty() {
292                        ops.remove(pointer + 1);
293                    }
294                } else if ops[pointer + 1].is_empty() {
295                    ops.remove(pointer + 1);
296                } else {
297                    // We can't shift upwards anymore
298                    break;
299                }
300            }
301            // Shift Deletions Downwards
302            (DiffTag::Delete, DiffTag::Equal) => {
303                // check common suffix for the amount we can shift
304                let prefix_len =
305                    common_prefix_len(old, next_op.old_range(), new, this_op.new_range());
306                if prefix_len > 0 {
307                    if let Some(DiffTag::Equal) = pointer
308                        .checked_sub(1)
309                        .and_then(|x| ops.get(x))
310                        .map(|x| x.tag())
311                    {
312                        ops[pointer - 1].grow_right(prefix_len);
313                    } else {
314                        ops.insert(
315                            pointer,
316                            DiffOp::Equal {
317                                old_index: next_op.old_range().start,
318                                new_index: this_op.new_range().start,
319                                len: prefix_len,
320                            },
321                        );
322                        pointer += 1;
323                    }
324                    ops[pointer].shift_right(prefix_len);
325                    ops[pointer + 1].shrink_right(prefix_len);
326
327                    if ops[pointer + 1].is_empty() {
328                        ops.remove(pointer + 1);
329                    }
330                } else if ops[pointer + 1].is_empty() {
331                    ops.remove(pointer + 1);
332                } else {
333                    // We can't shift downwards anymore
334                    break;
335                }
336            }
337            // Swap the Delete and Insert
338            (DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => {
339                ops.swap(pointer, pointer + 1);
340                pointer += 1;
341            }
342            // Merge the two ranges
343            (DiffTag::Insert, DiffTag::Insert) => {
344                ops[pointer].grow_right(next_op.new_range().len());
345                ops.remove(pointer + 1);
346            }
347            (DiffTag::Delete, DiffTag::Delete) => {
348                ops[pointer].grow_right(next_op.old_range().len());
349                ops.remove(pointer + 1);
350            }
351            _ => unreachable!("unexpected tag"),
352        }
353    }
354    pointer
355}