regalloc2/
moves.rs

1/*
2 * Released under the terms of the Apache 2.0 license with LLVM
3 * exception. See `LICENSE` for details.
4 */
5
6use crate::{ion::data_structures::u64_key, Allocation, PReg};
7use smallvec::{smallvec, SmallVec};
8use std::fmt::Debug;
9
10/// A list of moves to be performed in sequence, with auxiliary data
11/// attached to each.
12pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
13
14/// A list of moves to be performance in sequence, like a
15/// `MoveVec<T>`, except that an unchosen scratch space may occur as
16/// well, represented by `Allocation::none()`.
17#[derive(Clone, Debug)]
18pub enum MoveVecWithScratch<T> {
19    /// No scratch was actually used.
20    NoScratch(MoveVec<T>),
21    /// A scratch space was used.
22    Scratch(MoveVec<T>),
23}
24
25/// A `ParallelMoves` represents a list of alloc-to-alloc moves that
26/// must happen in parallel -- i.e., all reads of sources semantically
27/// happen before all writes of destinations, and destinations are
28/// allowed to overwrite sources. It can compute a list of sequential
29/// moves that will produce the equivalent data movement, possibly
30/// using a scratch register if one is necessary.
31pub struct ParallelMoves<T: Clone + Copy + Default> {
32    parallel_moves: MoveVec<T>,
33}
34
35impl<T: Clone + Copy + Default> ParallelMoves<T> {
36    pub fn new() -> Self {
37        Self {
38            parallel_moves: smallvec![],
39        }
40    }
41
42    pub fn add(&mut self, from: Allocation, to: Allocation, t: T) {
43        self.parallel_moves.push((from, to, t));
44    }
45
46    fn sources_overlap_dests(&self) -> bool {
47        // Assumes `parallel_moves` has already been sorted in `resolve()` below.
48        for &(_, dst, _) in &self.parallel_moves {
49            if self
50                .parallel_moves
51                .binary_search_by_key(&dst, |&(src, _, _)| src)
52                .is_ok()
53            {
54                return true;
55            }
56        }
57        false
58    }
59
60    /// Resolve the parallel-moves problem to a sequence of separate
61    /// moves, such that the combined effect of the sequential moves
62    /// is as-if all of the moves added to this `ParallelMoves`
63    /// resolver happened in parallel.
64    ///
65    /// Sometimes, if there is a cycle, a scratch register is
66    /// necessary to allow the moves to occur sequentially. In this
67    /// case, `Allocation::none()` is returned to represent the
68    /// scratch register. The caller may choose to always hold a
69    /// separate scratch register unused to allow this to be trivially
70    /// rewritten; or may dynamically search for or create a free
71    /// register as needed, if none are available.
72    pub fn resolve(mut self) -> MoveVecWithScratch<T> {
73        // Easy case: zero or one move. Just return our vec.
74        if self.parallel_moves.len() <= 1 {
75            return MoveVecWithScratch::NoScratch(self.parallel_moves);
76        }
77
78        // Sort moves by source so that we can efficiently test for
79        // presence.
80        self.parallel_moves
81            .sort_by_key(|&(src, dst, _)| u64_key(src.bits(), dst.bits()));
82
83        // Do any dests overlap sources? If not, we can also just
84        // return the list.
85        if !self.sources_overlap_dests() {
86            return MoveVecWithScratch::NoScratch(self.parallel_moves);
87        }
88
89        // General case: some moves overwrite dests that other moves
90        // read as sources. We'll use a general algorithm.
91        //
92        // *Important property*: because we expect that each register
93        // has only one writer (otherwise the effect of the parallel
94        // move is undefined), each move can only block one other move
95        // (with its one source corresponding to the one writer of
96        // that source). Thus, we *can only have simple cycles* (those
97        // that are a ring of nodes, i.e., with only one path from a
98        // node back to itself); there are no SCCs that are more
99        // complex than that. We leverage this fact below to avoid
100        // having to do a full Tarjan SCC DFS (with lowest-index
101        // computation, etc.): instead, as soon as we find a cycle, we
102        // know we have the full cycle and we can do a cyclic move
103        // sequence and continue.
104
105        // Sort moves by destination and check that each destination
106        // has only one writer.
107        self.parallel_moves.sort_by_key(|&(_, dst, _)| dst);
108        if cfg!(debug_assertions) {
109            let mut last_dst = None;
110            for &(_, dst, _) in &self.parallel_moves {
111                if last_dst.is_some() {
112                    debug_assert!(last_dst.unwrap() != dst);
113                }
114                last_dst = Some(dst);
115            }
116        }
117
118        // Construct a mapping from move indices to moves they must
119        // come before. Any given move must come before a move that
120        // overwrites its destination; we have moves sorted by dest
121        // above so we can efficiently find such a move, if any.
122        let mut must_come_before: SmallVec<[Option<usize>; 16]> =
123            smallvec![None; self.parallel_moves.len()];
124        for (i, &(src, _, _)) in self.parallel_moves.iter().enumerate() {
125            if let Ok(move_to_dst_idx) = self
126                .parallel_moves
127                .binary_search_by_key(&src, |&(_, dst, _)| dst)
128            {
129                must_come_before[i] = Some(move_to_dst_idx);
130            }
131        }
132
133        // Do a simple stack-based DFS and emit moves in postorder,
134        // then reverse at the end for RPO. Unlike Tarjan's SCC
135        // algorithm, we can emit a cycle as soon as we find one, as
136        // noted above.
137        let mut ret: MoveVec<T> = smallvec![];
138        let mut stack: SmallVec<[usize; 16]> = smallvec![];
139        let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
140        let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
141        let mut scratch_used = false;
142
143        stack.push(0);
144        onstack[0] = true;
145        loop {
146            if stack.is_empty() {
147                if let Some(next) = visited.iter().position(|&flag| !flag) {
148                    stack.push(next);
149                    onstack[next] = true;
150                } else {
151                    break;
152                }
153            }
154
155            let top = *stack.last().unwrap();
156            visited[top] = true;
157            match must_come_before[top] {
158                None => {
159                    ret.push(self.parallel_moves[top]);
160                    onstack[top] = false;
161                    stack.pop();
162                    while let Some(top) = stack.pop() {
163                        ret.push(self.parallel_moves[top]);
164                        onstack[top] = false;
165                    }
166                }
167                Some(next) if visited[next] && !onstack[next] => {
168                    ret.push(self.parallel_moves[top]);
169                    onstack[top] = false;
170                    stack.pop();
171                    while let Some(top) = stack.pop() {
172                        ret.push(self.parallel_moves[top]);
173                        onstack[top] = false;
174                    }
175                }
176                Some(next) if !visited[next] && !onstack[next] => {
177                    stack.push(next);
178                    onstack[next] = true;
179                    continue;
180                }
181                Some(next) => {
182                    // Found a cycle -- emit a cyclic-move sequence
183                    // for the cycle on the top of stack, then normal
184                    // moves below it. Recall that these moves will be
185                    // reversed in sequence, so from the original
186                    // parallel move set
187                    //
188                    //     { B := A, C := B, A := B }
189                    //
190                    // we will generate something like:
191                    //
192                    //     A := scratch
193                    //     B := A
194                    //     C := B
195                    //     scratch := C
196                    //
197                    // which will become:
198                    //
199                    //     scratch := C
200                    //     C := B
201                    //     B := A
202                    //     A := scratch
203                    let mut last_dst = None;
204                    let mut scratch_src = None;
205                    while let Some(move_idx) = stack.pop() {
206                        onstack[move_idx] = false;
207                        let (mut src, dst, dst_t) = self.parallel_moves[move_idx];
208                        if last_dst.is_none() {
209                            scratch_src = Some(src);
210                            src = Allocation::none();
211                            scratch_used = true;
212                        } else {
213                            debug_assert_eq!(last_dst.unwrap(), src);
214                        }
215                        ret.push((src, dst, dst_t));
216
217                        last_dst = Some(dst);
218
219                        if move_idx == next {
220                            break;
221                        }
222                    }
223                    if let Some(src) = scratch_src {
224                        ret.push((src, Allocation::none(), T::default()));
225                    }
226                }
227            }
228        }
229
230        ret.reverse();
231
232        if scratch_used {
233            MoveVecWithScratch::Scratch(ret)
234        } else {
235            MoveVecWithScratch::NoScratch(ret)
236        }
237    }
238}
239
240impl<T> MoveVecWithScratch<T> {
241    /// Fills in the scratch space, if needed, with the given
242    /// register/allocation and returns a final list of moves. The
243    /// scratch register must not occur anywhere in the parallel-move
244    /// problem given to the resolver that produced this
245    /// `MoveVecWithScratch`.
246    pub fn with_scratch(self, scratch: Allocation) -> MoveVec<T> {
247        match self {
248            MoveVecWithScratch::NoScratch(moves) => moves,
249            MoveVecWithScratch::Scratch(mut moves) => {
250                for (src, dst, _) in &mut moves {
251                    debug_assert!(
252                        *src != scratch && *dst != scratch,
253                        "Scratch register should not also be an actual source or dest of moves"
254                    );
255                    debug_assert!(
256                        !(src.is_none() && dst.is_none()),
257                        "Move resolution should not have produced a scratch-to-scratch move"
258                    );
259                    if src.is_none() {
260                        *src = scratch;
261                    }
262                    if dst.is_none() {
263                        *dst = scratch;
264                    }
265                }
266                moves
267            }
268        }
269    }
270
271    /// Unwrap without a scratch register.
272    pub fn without_scratch(self) -> Option<MoveVec<T>> {
273        match self {
274            MoveVecWithScratch::NoScratch(moves) => Some(moves),
275            MoveVecWithScratch::Scratch(..) => None,
276        }
277    }
278
279    /// Do we need a scratch register?
280    pub fn needs_scratch(&self) -> bool {
281        match self {
282            MoveVecWithScratch::NoScratch(..) => false,
283            MoveVecWithScratch::Scratch(..) => true,
284        }
285    }
286
287    /// Do any moves go from stack to stack?
288    pub fn stack_to_stack(&self, is_stack_alloc: impl Fn(Allocation) -> bool) -> bool {
289        match self {
290            MoveVecWithScratch::NoScratch(moves) | MoveVecWithScratch::Scratch(moves) => moves
291                .iter()
292                .any(|&(src, dst, _)| is_stack_alloc(src) && is_stack_alloc(dst)),
293        }
294    }
295}
296
297/// Final stage of move resolution: finding or using scratch
298/// registers, creating them if necessary by using stackslots, and
299/// ensuring that the final list of moves contains no stack-to-stack
300/// moves.
301///
302/// The resolved list of moves may need one or two scratch registers,
303/// and maybe a stackslot, to ensure these conditions. Our general
304/// strategy is in two steps.
305///
306/// First, we find a scratch register, so we only have to worry about
307/// a list of moves, all with real locations as src and dest. If we're
308/// lucky and there are any registers not allocated at this
309/// program-point, we can use a real register. Otherwise, we use an
310/// extra stackslot. This is fine, because at this step,
311/// stack-to-stack moves are OK.
312///
313/// Then, we resolve stack-to-stack moves into stack-to-reg /
314/// reg-to-stack pairs. For this, we try to allocate a second free
315/// register. If unavailable, we create another scratch stackslot, and
316/// we pick a "victim" register in the appropriate class, and we
317/// resolve into: victim -> extra-stackslot; stack-src -> victim;
318/// victim -> stack-dst; extra-stackslot -> victim.
319///
320/// Sometimes move elision will be able to clean this up a bit. But,
321/// for simplicity reasons, let's keep the concerns separated! So we
322/// always do the full expansion above.
323pub struct MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc>
324where
325    GetReg: FnMut() -> Option<Allocation>,
326    GetStackSlot: FnMut() -> Allocation,
327    IsStackAlloc: Fn(Allocation) -> bool,
328{
329    /// Scratch register for stack-to-stack move expansion.
330    stack_stack_scratch_reg: Option<Allocation>,
331    /// Stackslot into which we need to save the stack-to-stack
332    /// scratch reg before doing any stack-to-stack moves, if we stole
333    /// the reg.
334    stack_stack_scratch_reg_save: Option<Allocation>,
335    /// Closure that finds us a PReg at the current location.
336    find_free_reg: GetReg,
337    /// Closure that gets us a stackslot, if needed.
338    get_stackslot: GetStackSlot,
339    /// Closure to determine whether an `Allocation` refers to a stack slot.
340    is_stack_alloc: IsStackAlloc,
341    /// The victim PReg to evict to another stackslot at every
342    /// stack-to-stack move if a free PReg is not otherwise
343    /// available. Provided by caller and statically chosen. This is a
344    /// very last-ditch option, so static choice is OK.
345    victim: PReg,
346}
347
348impl<GetReg, GetStackSlot, IsStackAlloc> MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc>
349where
350    GetReg: FnMut() -> Option<Allocation>,
351    GetStackSlot: FnMut() -> Allocation,
352    IsStackAlloc: Fn(Allocation) -> bool,
353{
354    pub fn new(
355        find_free_reg: GetReg,
356        get_stackslot: GetStackSlot,
357        is_stack_alloc: IsStackAlloc,
358        victim: PReg,
359    ) -> Self {
360        Self {
361            stack_stack_scratch_reg: None,
362            stack_stack_scratch_reg_save: None,
363            find_free_reg,
364            get_stackslot,
365            is_stack_alloc,
366            victim,
367        }
368    }
369
370    pub fn compute<T: Debug + Copy>(mut self, moves: MoveVecWithScratch<T>) -> MoveVec<T> {
371        // First, do we have a vec with no stack-to-stack moves or use
372        // of a scratch register? Fast return if so.
373        if !moves.needs_scratch() && !moves.stack_to_stack(&self.is_stack_alloc) {
374            return moves.without_scratch().unwrap();
375        }
376
377        let mut result = smallvec![];
378
379        // Now, find a scratch allocation in order to resolve cycles.
380        let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)());
381        trace!("scratch resolver: scratch alloc {:?}", scratch);
382
383        let moves = moves.with_scratch(scratch);
384        for &(src, dst, data) in &moves {
385            // Do we have a stack-to-stack move? If so, resolve.
386            if (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) {
387                trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst);
388                // Lazily allocate a stack-to-stack scratch.
389                if self.stack_stack_scratch_reg.is_none() {
390                    if let Some(reg) = (self.find_free_reg)() {
391                        trace!(
392                            "scratch resolver: have free stack-to-stack scratch preg: {:?}",
393                            reg
394                        );
395                        self.stack_stack_scratch_reg = Some(reg);
396                    } else {
397                        self.stack_stack_scratch_reg = Some(Allocation::reg(self.victim));
398                        self.stack_stack_scratch_reg_save = Some((self.get_stackslot)());
399                        trace!("scratch resolver: stack-to-stack using victim {:?} with save stackslot {:?}",
400                                    self.stack_stack_scratch_reg,
401                                    self.stack_stack_scratch_reg_save);
402                    }
403                }
404
405                // If we have a "victimless scratch", then do a
406                // stack-to-scratch / scratch-to-stack sequence.
407                if self.stack_stack_scratch_reg_save.is_none() {
408                    result.push((src, self.stack_stack_scratch_reg.unwrap(), data));
409                    result.push((self.stack_stack_scratch_reg.unwrap(), dst, data));
410                }
411                // Otherwise, save the current value in the
412                // stack-to-stack scratch reg (which is our victim) to
413                // the extra stackslot, then do the stack-to-scratch /
414                // scratch-to-stack sequence, then restore it.
415                else {
416                    result.push((
417                        self.stack_stack_scratch_reg.unwrap(),
418                        self.stack_stack_scratch_reg_save.unwrap(),
419                        data,
420                    ));
421                    result.push((src, self.stack_stack_scratch_reg.unwrap(), data));
422                    result.push((self.stack_stack_scratch_reg.unwrap(), dst, data));
423                    result.push((
424                        self.stack_stack_scratch_reg_save.unwrap(),
425                        self.stack_stack_scratch_reg.unwrap(),
426                        data,
427                    ));
428                }
429            } else {
430                // Normal move.
431                result.push((src, dst, data));
432            }
433        }
434
435        trace!("scratch resolver: got {:?}", result);
436        result
437    }
438}