regalloc2/ion/moves.rs
1/*
2 * This file was initially derived from the files
3 * `js/src/jit/BacktrackingAllocator.h` and
4 * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was
5 * originally licensed under the Mozilla Public License 2.0. We
6 * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see
7 * https://github.com/bytecodealliance/regalloc2/issues/7).
8 *
9 * Since the initial port, the design has been substantially evolved
10 * and optimized.
11 */
12
13//! Move resolution.
14
15use super::{
16 Env, InsertMovePrio, InsertedMove, LiveRangeFlag, LiveRangeIndex, RedundantMoveEliminator,
17 VRegIndex, SLOT_NONE,
18};
19use crate::ion::data_structures::{
20 BlockparamIn, BlockparamOut, CodeRange, FixedRegFixupLevel, LiveRangeKey, PosWithPrio,
21};
22use crate::ion::reg_traversal::RegTraversalIter;
23use crate::moves::{MoveAndScratchResolver, ParallelMoves};
24use crate::{
25 Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
26 OperandPos, PReg, ProgPoint, RegClass, SpillSlot, VReg,
27};
28use fxhash::FxHashMap;
29use smallvec::{smallvec, SmallVec};
30use std::fmt::Debug;
31
32impl<'a, F: Function> Env<'a, F> {
33 pub fn is_start_of_block(&self, pos: ProgPoint) -> bool {
34 let block = self.cfginfo.insn_block[pos.inst().index()];
35 pos == self.cfginfo.block_entry[block.index()]
36 }
37 pub fn is_end_of_block(&self, pos: ProgPoint) -> bool {
38 let block = self.cfginfo.insn_block[pos.inst().index()];
39 pos == self.cfginfo.block_exit[block.index()]
40 }
41
42 pub fn insert_move(
43 &mut self,
44 pos: ProgPoint,
45 prio: InsertMovePrio,
46 from_alloc: Allocation,
47 to_alloc: Allocation,
48 to_vreg: VReg,
49 ) {
50 trace!(
51 "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?} to_vreg {:?}",
52 pos,
53 prio,
54 from_alloc,
55 to_alloc,
56 to_vreg
57 );
58 if let Some(from) = from_alloc.as_reg() {
59 debug_assert_eq!(from.class(), to_vreg.class());
60 }
61 if let Some(to) = to_alloc.as_reg() {
62 debug_assert_eq!(to.class(), to_vreg.class());
63 }
64 self.inserted_moves.push(InsertedMove {
65 pos_prio: PosWithPrio {
66 pos,
67 prio: prio as u32,
68 },
69 from_alloc,
70 to_alloc,
71 to_vreg,
72 });
73 }
74
75 pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation {
76 let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..];
77 inst_allocs[slot]
78 }
79
80 pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) {
81 let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..];
82 inst_allocs[slot] = alloc;
83 }
84
85 pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation {
86 trace!("get_alloc_for_range: {:?}", range);
87 let bundle = self.ranges[range.index()].bundle;
88 trace!(" -> bundle: {:?}", bundle);
89 let bundledata = &self.bundles[bundle.index()];
90 trace!(" -> allocation {:?}", bundledata.allocation);
91 if bundledata.allocation != Allocation::none() {
92 bundledata.allocation
93 } else {
94 trace!(" -> spillset {:?}", bundledata.spillset);
95 trace!(
96 " -> spill slot {:?}",
97 self.spillsets[bundledata.spillset.index()].slot
98 );
99 self.spillslots[self.spillsets[bundledata.spillset.index()].slot.index()].alloc
100 }
101 }
102
103 pub fn apply_allocations_and_insert_moves(&mut self) {
104 trace!("apply_allocations_and_insert_moves");
105 trace!("blockparam_ins: {:?}", self.blockparam_ins);
106 trace!("blockparam_outs: {:?}", self.blockparam_outs);
107
108 // Now that all splits are done, we can pay the cost once to
109 // sort VReg range lists and update with the final ranges.
110 for vreg in &mut self.vregs {
111 for entry in &mut vreg.ranges {
112 entry.range = self.ranges[entry.index.index()].range;
113 }
114 vreg.ranges.sort_unstable_by_key(|entry| entry.range.from);
115 }
116
117 /// We create "half-moves" in order to allow a single-scan
118 /// strategy with a subsequent sort. Basically, the key idea
119 /// is that as our single scan through a range for a vreg hits
120 /// upon the source or destination of an edge-move, we emit a
121 /// "half-move". These half-moves are carefully keyed in a
122 /// particular sort order (the field order below is
123 /// significant!) so that all half-moves on a given (from, to)
124 /// block-edge appear contiguously, and then all moves from a
125 /// given vreg appear contiguously. Within a given from-vreg,
126 /// pick the first `Source` (there should only be one, but
127 /// imprecision in liveranges due to loop handling sometimes
128 /// means that a blockparam-out is also recognized as a normal-out),
129 /// and then for each `Dest`, copy the source-alloc to that
130 /// dest-alloc.
131 #[derive(Clone, Debug, PartialEq, Eq)]
132 struct HalfMove {
133 key: u64,
134 alloc: Allocation,
135 }
136 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
137 #[repr(u8)]
138 enum HalfMoveKind {
139 Source = 0,
140 Dest = 1,
141 }
142 fn half_move_key(
143 from_block: Block,
144 to_block: Block,
145 to_vreg: VRegIndex,
146 kind: HalfMoveKind,
147 ) -> u64 {
148 debug_assert!(from_block.index() < 1 << 21);
149 debug_assert!(to_block.index() < 1 << 21);
150 debug_assert!(to_vreg.index() < 1 << 21);
151 ((from_block.index() as u64) << 43)
152 | ((to_block.index() as u64) << 22)
153 | ((to_vreg.index() as u64) << 1)
154 | (kind as u8 as u64)
155 }
156 impl HalfMove {
157 fn from_block(&self) -> Block {
158 Block::new(((self.key >> 43) & ((1 << 21) - 1)) as usize)
159 }
160 fn to_block(&self) -> Block {
161 Block::new(((self.key >> 22) & ((1 << 21) - 1)) as usize)
162 }
163 fn to_vreg(&self) -> VRegIndex {
164 VRegIndex::new(((self.key >> 1) & ((1 << 21) - 1)) as usize)
165 }
166 fn kind(&self) -> HalfMoveKind {
167 if self.key & 1 == 1 {
168 HalfMoveKind::Dest
169 } else {
170 HalfMoveKind::Source
171 }
172 }
173 }
174
175 let debug_labels = self.func.debug_value_labels();
176
177 let mut half_moves: Vec<HalfMove> = Vec::with_capacity(6 * self.func.num_insts());
178 let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2);
179
180 let mut blockparam_in_idx = 0;
181 let mut blockparam_out_idx = 0;
182 let mut prog_move_src_idx = 0;
183 let mut prog_move_dst_idx = 0;
184 for vreg in 0..self.vregs.len() {
185 let vreg = VRegIndex::new(vreg);
186 if !self.is_vreg_used(vreg) {
187 continue;
188 }
189
190 // For each range in each vreg, insert moves or
191 // half-moves. We also scan over `blockparam_ins` and
192 // `blockparam_outs`, which are sorted by (block, vreg),
193 // and over program-move srcs/dsts to fill in allocations.
194 let mut prev = LiveRangeIndex::invalid();
195 for range_idx in 0..self.vregs[vreg.index()].ranges.len() {
196 let entry = self.vregs[vreg.index()].ranges[range_idx];
197 let alloc = self.get_alloc_for_range(entry.index);
198 let range = entry.range;
199 trace!(
200 "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}",
201 vreg,
202 entry.index,
203 range,
204 alloc,
205 );
206 debug_assert!(alloc != Allocation::none());
207
208 if self.annotations_enabled {
209 self.annotate(
210 range.from,
211 format!(
212 " <<< start v{} in {} (range{}) (bundle{})",
213 vreg.index(),
214 alloc,
215 entry.index.index(),
216 self.ranges[entry.index.index()].bundle.raw_u32(),
217 ),
218 );
219 self.annotate(
220 range.to,
221 format!(
222 " end v{} in {} (range{}) (bundle{}) >>>",
223 vreg.index(),
224 alloc,
225 entry.index.index(),
226 self.ranges[entry.index.index()].bundle.raw_u32(),
227 ),
228 );
229 }
230
231 // Does this range follow immediately after a prior
232 // range in the same block? If so, insert a move (if
233 // the allocs differ). We do this directly rather than
234 // with half-moves because we eagerly know both sides
235 // already (and also, half-moves are specific to
236 // inter-block transfers).
237 //
238 // Note that we do *not* do this if there is also a
239 // def as the first use in the new range: it's
240 // possible that an old liverange covers the Before
241 // pos of an inst, a new liverange covers the After
242 // pos, and the def also happens at After. In this
243 // case we don't want to an insert a move after the
244 // instruction copying the old liverange.
245 //
246 // Note also that we assert that the new range has to
247 // start at the Before-point of an instruction; we
248 // can't insert a move that logically happens just
249 // before After (i.e. in the middle of a single
250 // instruction).
251 if prev.is_valid() {
252 let prev_alloc = self.get_alloc_for_range(prev);
253 let prev_range = self.ranges[prev.index()].range;
254 let first_is_def =
255 self.ranges[entry.index.index()].has_flag(LiveRangeFlag::StartsAtDef);
256 debug_assert!(prev_alloc != Allocation::none());
257
258 if prev_range.to == range.from
259 && !self.is_start_of_block(range.from)
260 && !first_is_def
261 {
262 trace!(
263 "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}",
264 prev.index(),
265 entry.index.index(),
266 prev_alloc,
267 alloc,
268 vreg.index()
269 );
270 debug_assert_eq!(range.from.pos(), InstPosition::Before);
271 self.insert_move(
272 range.from,
273 InsertMovePrio::Regular,
274 prev_alloc,
275 alloc,
276 self.vreg(vreg),
277 );
278 }
279 }
280
281 // Scan over blocks whose ends are covered by this
282 // range. For each, for each successor that is not
283 // already in this range (hence guaranteed to have the
284 // same allocation) and if the vreg is live, add a
285 // Source half-move.
286 let mut block = self.cfginfo.insn_block[range.from.inst().index()];
287 while block.is_valid() && block.index() < self.func.num_blocks() {
288 if range.to < self.cfginfo.block_exit[block.index()].next() {
289 break;
290 }
291 trace!("examining block with end in range: block{}", block.index());
292 for &succ in self.func.block_succs(block) {
293 trace!(
294 " -> has succ block {} with entry {:?}",
295 succ.index(),
296 self.cfginfo.block_entry[succ.index()]
297 );
298 if range.contains_point(self.cfginfo.block_entry[succ.index()]) {
299 continue;
300 }
301 trace!(" -> out of this range, requires half-move if live");
302 if self.is_live_in(succ, vreg) {
303 trace!(" -> live at input to succ, adding halfmove");
304 half_moves.push(HalfMove {
305 key: half_move_key(block, succ, vreg, HalfMoveKind::Source),
306 alloc,
307 });
308 }
309 }
310
311 // Scan forward in `blockparam_outs`, adding all
312 // half-moves for outgoing values to blockparams
313 // in succs.
314 trace!(
315 "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}",
316 vreg.index(),
317 block.index(),
318 blockparam_out_idx,
319 );
320 while blockparam_out_idx < self.blockparam_outs.len() {
321 let BlockparamOut {
322 from_vreg,
323 from_block,
324 to_block,
325 to_vreg,
326 } = self.blockparam_outs[blockparam_out_idx];
327 if (from_vreg, from_block) > (vreg, block) {
328 break;
329 }
330 if (from_vreg, from_block) == (vreg, block) {
331 trace!(
332 " -> found: from v{} block{} to v{} block{}",
333 from_vreg.index(),
334 from_block.index(),
335 to_vreg.index(),
336 to_vreg.index()
337 );
338 half_moves.push(HalfMove {
339 key: half_move_key(
340 from_block,
341 to_block,
342 to_vreg,
343 HalfMoveKind::Source,
344 ),
345 alloc,
346 });
347
348 if self.annotations_enabled {
349 self.annotate(
350 self.cfginfo.block_exit[block.index()],
351 format!(
352 "blockparam-out: block{} to block{}: v{} to v{} in {}",
353 from_block.index(),
354 to_block.index(),
355 from_vreg.index(),
356 to_vreg.index(),
357 alloc
358 ),
359 );
360 }
361 }
362
363 blockparam_out_idx += 1;
364 }
365
366 block = block.next();
367 }
368
369 // Scan over blocks whose beginnings are covered by
370 // this range and for which the vreg is live at the
371 // start of the block. For each, for each predecessor,
372 // add a Dest half-move.
373 let mut block = self.cfginfo.insn_block[range.from.inst().index()];
374 if self.cfginfo.block_entry[block.index()] < range.from {
375 block = block.next();
376 }
377 while block.is_valid() && block.index() < self.func.num_blocks() {
378 if self.cfginfo.block_entry[block.index()] >= range.to {
379 break;
380 }
381
382 // Add half-moves for blockparam inputs.
383 trace!(
384 "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}",
385 vreg.index(),
386 block.index(),
387 blockparam_in_idx
388 );
389 while blockparam_in_idx < self.blockparam_ins.len() {
390 let BlockparamIn {
391 from_block,
392 to_block,
393 to_vreg,
394 } = self.blockparam_ins[blockparam_in_idx];
395 if (to_vreg, to_block) > (vreg, block) {
396 break;
397 }
398 if (to_vreg, to_block) == (vreg, block) {
399 half_moves.push(HalfMove {
400 key: half_move_key(
401 from_block,
402 to_block,
403 to_vreg,
404 HalfMoveKind::Dest,
405 ),
406 alloc,
407 });
408 trace!(
409 "match: blockparam_in: v{} in block{} from block{} into {}",
410 to_vreg.index(),
411 to_block.index(),
412 from_block.index(),
413 alloc,
414 );
415 #[cfg(debug_assertions)]
416 if self.annotations_enabled {
417 self.annotate(
418 self.cfginfo.block_entry[block.index()],
419 format!(
420 "blockparam-in: block{} to block{}:into v{} in {}",
421 from_block.index(),
422 to_block.index(),
423 to_vreg.index(),
424 alloc
425 ),
426 );
427 }
428 }
429 blockparam_in_idx += 1;
430 }
431
432 if !self.is_live_in(block, vreg) {
433 block = block.next();
434 continue;
435 }
436
437 trace!(
438 "scanning preds at vreg {} block {} for ends outside the range",
439 vreg.index(),
440 block.index()
441 );
442
443 // Now find any preds whose ends are not in the
444 // same range, and insert appropriate moves.
445 for &pred in self.func.block_preds(block) {
446 trace!(
447 "pred block {} has exit {:?}",
448 pred.index(),
449 self.cfginfo.block_exit[pred.index()]
450 );
451 if range.contains_point(self.cfginfo.block_exit[pred.index()]) {
452 continue;
453 }
454 trace!(" -> requires half-move");
455 half_moves.push(HalfMove {
456 key: half_move_key(pred, block, vreg, HalfMoveKind::Dest),
457 alloc,
458 });
459 }
460
461 block = block.next();
462 }
463
464 // Scan over def/uses and apply allocations.
465 for use_idx in 0..self.ranges[entry.index.index()].uses.len() {
466 let usedata = self.ranges[entry.index.index()].uses[use_idx];
467 trace!("applying to use: {:?}", usedata);
468 debug_assert!(range.contains_point(usedata.pos));
469 let inst = usedata.pos.inst();
470 let slot = usedata.slot;
471 let operand = usedata.operand;
472 // Safepoints add virtual uses with no slots;
473 // avoid these.
474 if slot != SLOT_NONE {
475 self.set_alloc(inst, slot as usize, alloc);
476 }
477 if let OperandConstraint::Reuse(_) = operand.constraint() {
478 reuse_input_insts.push(inst);
479 }
480 }
481
482 // Scan debug-labels on this vreg that overlap with
483 // this range, producing a debug-info output record
484 // giving the allocation location for each label.
485 if !debug_labels.is_empty() {
486 // Do a binary search to find the start of any
487 // labels for this vreg. Recall that we require
488 // debug-label requests to be sorted by vreg as a
489 // precondition (which we verified above).
490 let start = debug_labels
491 .binary_search_by(|&(label_vreg, _label_from, _label_to, _label)| {
492 // Search for the point just before the first
493 // tuple that could be for `vreg` overlapping
494 // with `range`. Never return
495 // `Ordering::Equal`; `binary_search_by` in
496 // this case returns the index of the first
497 // entry that is greater as an `Err`.
498 if label_vreg.vreg() < vreg.index() {
499 std::cmp::Ordering::Less
500 } else {
501 std::cmp::Ordering::Greater
502 }
503 })
504 .unwrap_err();
505
506 for &(label_vreg, label_from, label_to, label) in &debug_labels[start..] {
507 let label_from = ProgPoint::before(label_from);
508 let label_to = ProgPoint::before(label_to);
509 let label_range = CodeRange {
510 from: label_from,
511 to: label_to,
512 };
513 if label_vreg.vreg() != vreg.index() {
514 break;
515 }
516 if !range.overlaps(&label_range) {
517 continue;
518 }
519
520 let from = std::cmp::max(label_from, range.from);
521 let to = std::cmp::min(label_to, range.to);
522
523 self.debug_locations.push((label, from, to, alloc));
524 }
525 }
526
527 // Scan over program move srcs/dsts to fill in allocations.
528
529 // Move srcs happen at `After` of a given
530 // inst. Compute [from, to) semi-inclusive range of
531 // inst indices for which we should fill in the source
532 // with this LR's allocation.
533 //
534 // range from inst-Before or inst-After covers cur
535 // inst's After; so includes move srcs from inst.
536 let move_src_start = (vreg, range.from.inst());
537 // range to (exclusive) inst-Before or inst-After
538 // covers only prev inst's After; so includes move
539 // srcs to (exclusive) inst.
540 let move_src_end = (vreg, range.to.inst());
541 trace!(
542 "vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}",
543 vreg,
544 range,
545 move_src_start,
546 move_src_end
547 );
548 while prog_move_src_idx < self.prog_move_srcs.len()
549 && self.prog_move_srcs[prog_move_src_idx].0 < move_src_start
550 {
551 trace!(" -> skipping idx {}", prog_move_src_idx);
552 prog_move_src_idx += 1;
553 }
554 while prog_move_src_idx < self.prog_move_srcs.len()
555 && self.prog_move_srcs[prog_move_src_idx].0 < move_src_end
556 {
557 trace!(
558 " -> setting idx {} ({:?}) to alloc {:?}",
559 prog_move_src_idx,
560 self.prog_move_srcs[prog_move_src_idx].0,
561 alloc
562 );
563 self.prog_move_srcs[prog_move_src_idx].1 = alloc;
564 prog_move_src_idx += 1;
565 }
566
567 // move dsts happen at Before point.
568 //
569 // Range from inst-Before includes cur inst, while inst-After includes only next inst.
570 let move_dst_start = if range.from.pos() == InstPosition::Before {
571 (vreg, range.from.inst())
572 } else {
573 (vreg, range.from.inst().next())
574 };
575 // Range to (exclusive) inst-Before includes prev
576 // inst, so to (exclusive) cur inst; range to
577 // (exclusive) inst-After includes cur inst, so to
578 // (exclusive) next inst.
579 let move_dst_end = if range.to.pos() == InstPosition::Before {
580 (vreg, range.to.inst())
581 } else {
582 (vreg, range.to.inst().next())
583 };
584 trace!(
585 "vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}",
586 vreg,
587 range,
588 move_dst_start,
589 move_dst_end
590 );
591 while prog_move_dst_idx < self.prog_move_dsts.len()
592 && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_start
593 {
594 trace!(" -> skipping idx {}", prog_move_dst_idx);
595 prog_move_dst_idx += 1;
596 }
597 while prog_move_dst_idx < self.prog_move_dsts.len()
598 && self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_end
599 {
600 trace!(
601 " -> setting idx {} ({:?}) to alloc {:?}",
602 prog_move_dst_idx,
603 self.prog_move_dsts[prog_move_dst_idx].0,
604 alloc
605 );
606 self.prog_move_dsts[prog_move_dst_idx].1 = alloc;
607 prog_move_dst_idx += 1;
608 }
609
610 prev = entry.index;
611 }
612 }
613
614 // Sort the half-moves list. For each (from, to,
615 // from-vreg) tuple, find the from-alloc and all the
616 // to-allocs, and insert moves on the block edge.
617 half_moves.sort_unstable_by_key(|h| h.key);
618 trace!("halfmoves: {:?}", half_moves);
619 self.stats.halfmoves_count = half_moves.len();
620
621 let mut i = 0;
622 while i < half_moves.len() {
623 // Find a Source.
624 while i < half_moves.len() && half_moves[i].kind() != HalfMoveKind::Source {
625 i += 1;
626 }
627 if i >= half_moves.len() {
628 break;
629 }
630 let src = &half_moves[i];
631 i += 1;
632
633 // Find all Dests.
634 let dest_key = src.key | 1;
635 let first_dest = i;
636 while i < half_moves.len() && half_moves[i].key == dest_key {
637 i += 1;
638 }
639 let last_dest = i;
640
641 trace!(
642 "halfmove match: src {:?} dests {:?}",
643 src,
644 &half_moves[first_dest..last_dest]
645 );
646
647 // Determine the ProgPoint where moves on this (from, to)
648 // edge should go:
649 // - If there is more than one in-edge to `to`, then
650 // `from` must have only one out-edge; moves go at tail of
651 // `from` just before last Branch/Ret.
652 // - Otherwise, there must be at most one in-edge to `to`,
653 // and moves go at start of `to`.
654 let from_last_insn = self.func.block_insns(src.from_block()).last();
655 let to_first_insn = self.func.block_insns(src.to_block()).first();
656 let from_is_ret = self.func.is_ret(from_last_insn);
657 let to_is_entry = self.func.entry_block() == src.to_block();
658 let from_outs =
659 self.func.block_succs(src.from_block()).len() + if from_is_ret { 1 } else { 0 };
660 let to_ins =
661 self.func.block_preds(src.to_block()).len() + if to_is_entry { 1 } else { 0 };
662
663 let (insertion_point, prio) = if to_ins > 1 && from_outs <= 1 {
664 (
665 // N.B.: though semantically the edge moves happen
666 // after the branch, we must insert them before
667 // the branch because otherwise, of course, they
668 // would never execute. This is correct even in
669 // the presence of branches that read register
670 // inputs (e.g. conditional branches on some RISCs
671 // that branch on reg zero/not-zero, or any
672 // indirect branch), but for a very subtle reason:
673 // all cases of such branches will (or should)
674 // have multiple successors, and thus due to
675 // critical-edge splitting, their successors will
676 // have only the single predecessor, and we prefer
677 // to insert at the head of the successor in that
678 // case (rather than here). We make this a
679 // requirement, in fact: the user of this library
680 // shall not read registers in a branch
681 // instruction of there is only one successor per
682 // the given CFG information.
683 ProgPoint::before(from_last_insn),
684 InsertMovePrio::OutEdgeMoves,
685 )
686 } else if to_ins <= 1 {
687 (
688 ProgPoint::before(to_first_insn),
689 InsertMovePrio::InEdgeMoves,
690 )
691 } else {
692 panic!(
693 "Critical edge: can't insert moves between blocks {:?} and {:?}",
694 src.from_block(),
695 src.to_block()
696 );
697 };
698
699 let mut last = None;
700 for dest in first_dest..last_dest {
701 let dest = &half_moves[dest];
702 if last == Some(dest.alloc) {
703 continue;
704 }
705 self.insert_move(
706 insertion_point,
707 prio,
708 src.alloc,
709 dest.alloc,
710 self.vreg(dest.to_vreg()),
711 );
712 last = Some(dest.alloc);
713 }
714 }
715
716 // Handle multi-fixed-reg constraints by copying.
717 for fixup in std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) {
718 let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize);
719 let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index()));
720 trace!(
721 "multi-fixed-move constraint at {:?} from {} to {} for v{}",
722 fixup.pos,
723 from_alloc,
724 to_alloc,
725 fixup.vreg.index(),
726 );
727 let prio = match fixup.level {
728 FixedRegFixupLevel::Initial => InsertMovePrio::MultiFixedRegInitial,
729 FixedRegFixupLevel::Secondary => InsertMovePrio::MultiFixedRegSecondary,
730 };
731 self.insert_move(fixup.pos, prio, from_alloc, to_alloc, self.vreg(fixup.vreg));
732 self.set_alloc(
733 fixup.pos.inst(),
734 fixup.to_slot as usize,
735 Allocation::reg(PReg::from_index(fixup.to_preg.index())),
736 );
737 }
738
739 // Handle outputs that reuse inputs: copy beforehand, then set
740 // input's alloc to output's.
741 //
742 // Note that the output's allocation may not *actually* be
743 // valid until InstPosition::After, but the reused input may
744 // occur at InstPosition::Before. This may appear incorrect,
745 // but we make it work by ensuring that all *other* inputs are
746 // extended to InstPosition::After so that the def will not
747 // interfere. (The liveness computation code does this -- we
748 // do not require the user to do so.)
749 //
750 // One might ask: why not insist that input-reusing defs occur
751 // at InstPosition::Before? this would be correct, but would
752 // mean that the reused input and the reusing output
753 // interfere, *guaranteeing* that every such case would
754 // require a move. This is really bad on ISAs (like x86) where
755 // reused inputs are ubiquitous.
756 //
757 // Another approach might be to put the def at Before, and
758 // trim the reused input's liverange back to the previous
759 // instruction's After. This is kind of OK until (i) a block
760 // boundary occurs between the prior inst and this one, or
761 // (ii) any moves/spills/reloads occur between the two
762 // instructions. We really do need the input to be live at
763 // this inst's Before.
764 //
765 // In principle what we really need is a "BeforeBefore"
766 // program point, but we don't want to introduce that
767 // everywhere and pay the cost of twice as many ProgPoints
768 // throughout the allocator.
769 //
770 // Or we could introduce a separate move instruction -- this
771 // is the approach that regalloc.rs takes with "mod" operands
772 // -- but that is also costly.
773 //
774 // So we take this approach (invented by IonMonkey -- somewhat
775 // hard to discern, though see [0] for a comment that makes
776 // this slightly less unclear) to avoid interference between
777 // the actual reused input and reusing output, ensure
778 // interference (hence no incorrectness) between other inputs
779 // and the reusing output, and not require a separate explicit
780 // move instruction.
781 //
782 // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110
783 for inst in reuse_input_insts {
784 let mut input_reused: SmallVec<[usize; 4]> = smallvec![];
785 for output_idx in 0..self.func.inst_operands(inst).len() {
786 let operand = self.func.inst_operands(inst)[output_idx];
787 if let OperandConstraint::Reuse(input_idx) = operand.constraint() {
788 debug_assert!(!input_reused.contains(&input_idx));
789 debug_assert_eq!(operand.pos(), OperandPos::Late);
790 input_reused.push(input_idx);
791 let input_alloc = self.get_alloc(inst, input_idx);
792 let output_alloc = self.get_alloc(inst, output_idx);
793 trace!(
794 "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}",
795 inst,
796 output_idx,
797 output_alloc,
798 input_idx,
799 input_alloc
800 );
801 if input_alloc != output_alloc {
802 #[cfg(debug_assertions)]
803 if self.annotations_enabled {
804 self.annotate(
805 ProgPoint::before(inst),
806 format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc),
807 );
808 }
809 let input_operand = self.func.inst_operands(inst)[input_idx];
810 self.insert_move(
811 ProgPoint::before(inst),
812 InsertMovePrio::ReusedInput,
813 input_alloc,
814 output_alloc,
815 input_operand.vreg(),
816 );
817 self.set_alloc(inst, input_idx, output_alloc);
818 }
819 }
820 }
821 }
822
823 // Sort the prog-moves lists and insert moves to reify the
824 // input program's move operations.
825 self.prog_move_srcs
826 .sort_unstable_by_key(|((_, inst), _)| *inst);
827 self.prog_move_dsts
828 .sort_unstable_by_key(|((_, inst), _)| inst.prev());
829 let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]);
830 let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]);
831 debug_assert_eq!(prog_move_srcs.len(), prog_move_dsts.len());
832 for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in
833 prog_move_srcs.iter().zip(prog_move_dsts.iter())
834 {
835 trace!(
836 "program move at inst {:?}: alloc {:?} -> {:?} (v{})",
837 from_inst,
838 from_alloc,
839 to_alloc,
840 to_vreg.index(),
841 );
842 debug_assert!(from_alloc.is_some());
843 debug_assert!(to_alloc.is_some());
844 debug_assert_eq!(from_inst, to_inst.prev());
845 // N.B.: these moves happen with the *same* priority as
846 // LR-to-LR moves, because they work just like them: they
847 // connect a use at one progpoint (move-After) with a def
848 // at an adjacent progpoint (move+1-Before), so they must
849 // happen in parallel with all other LR-to-LR moves.
850 self.insert_move(
851 ProgPoint::before(to_inst),
852 InsertMovePrio::Regular,
853 from_alloc,
854 to_alloc,
855 self.vreg(to_vreg),
856 );
857 }
858
859 // Sort the debug-locations vector; we provide this
860 // invariant to the client.
861 self.debug_locations.sort_unstable();
862 }
863
864 pub fn resolve_inserted_moves(&mut self) {
865 // For each program point, gather all moves together. Then
866 // resolve (see cases below).
867 let mut i = 0;
868 self.inserted_moves
869 .sort_unstable_by_key(|m| m.pos_prio.key());
870
871 // Redundant-move elimination state tracker.
872 let mut redundant_moves = RedundantMoveEliminator::default();
873
874 fn redundant_move_process_side_effects<'a, F: Function>(
875 this: &Env<'a, F>,
876 redundant_moves: &mut RedundantMoveEliminator,
877 from: ProgPoint,
878 to: ProgPoint,
879 ) {
880 // If any safepoints in range, clear and return.
881 // Also, if we cross a block boundary, clear and return.
882 if this.cfginfo.insn_block[from.inst().index()]
883 != this.cfginfo.insn_block[to.inst().index()]
884 {
885 redundant_moves.clear();
886 return;
887 }
888 for inst in from.inst().index()..=to.inst().index() {
889 if this.func.requires_refs_on_stack(Inst::new(inst)) {
890 redundant_moves.clear();
891 return;
892 }
893 }
894
895 let start_inst = if from.pos() == InstPosition::Before {
896 from.inst()
897 } else {
898 from.inst().next()
899 };
900 let end_inst = if to.pos() == InstPosition::Before {
901 to.inst()
902 } else {
903 to.inst().next()
904 };
905 for inst in start_inst.index()..end_inst.index() {
906 let inst = Inst::new(inst);
907 for (i, op) in this.func.inst_operands(inst).iter().enumerate() {
908 match op.kind() {
909 OperandKind::Def => {
910 let alloc = this.get_alloc(inst, i);
911 redundant_moves.clear_alloc(alloc);
912 }
913 _ => {}
914 }
915 }
916 for reg in this.func.inst_clobbers(inst) {
917 redundant_moves.clear_alloc(Allocation::reg(reg));
918 }
919 }
920 }
921
922 let mut last_pos = ProgPoint::before(Inst::new(0));
923
924 while i < self.inserted_moves.len() {
925 let start = i;
926 let pos_prio = self.inserted_moves[i].pos_prio;
927 while i < self.inserted_moves.len() && self.inserted_moves[i].pos_prio == pos_prio {
928 i += 1;
929 }
930 let moves = &self.inserted_moves[start..i];
931
932 redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
933 last_pos = pos_prio.pos;
934
935 // Gather all the moves with Int class and Float class
936 // separately. These cannot interact, so it is safe to
937 // have two separate ParallelMove instances. They need to
938 // be separate because moves between the two classes are
939 // impossible. (We could enhance ParallelMoves to
940 // understand register classes, but this seems simpler.)
941 let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
942 let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
943
944 for m in moves {
945 if m.from_alloc == m.to_alloc {
946 continue;
947 }
948 match m.to_vreg.class() {
949 RegClass::Int => {
950 int_moves.push(m.clone());
951 }
952 RegClass::Float => {
953 float_moves.push(m.clone());
954 }
955 }
956 }
957
958 for &(regclass, moves) in
959 &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)]
960 {
961 // All moves in `moves` semantically happen in
962 // parallel. Let's resolve these to a sequence of moves
963 // that can be done one at a time.
964 let mut parallel_moves = ParallelMoves::new();
965 trace!(
966 "parallel moves at pos {:?} prio {:?}",
967 pos_prio.pos,
968 pos_prio.prio
969 );
970 for m in moves {
971 trace!(" {} -> {}", m.from_alloc, m.to_alloc);
972 parallel_moves.add(m.from_alloc, m.to_alloc, Some(m.to_vreg));
973 }
974
975 let resolved = parallel_moves.resolve();
976 let mut scratch_iter = RegTraversalIter::new(
977 self.env,
978 regclass,
979 PReg::invalid(),
980 PReg::invalid(),
981 0,
982 None,
983 );
984 let key = LiveRangeKey::from_range(&CodeRange {
985 from: pos_prio.pos,
986 to: pos_prio.pos.next(),
987 });
988 let get_reg = || {
989 while let Some(preg) = scratch_iter.next() {
990 if !self.pregs[preg.index()]
991 .allocations
992 .btree
993 .contains_key(&key)
994 {
995 let alloc = Allocation::reg(preg);
996 if moves
997 .iter()
998 .any(|m| m.from_alloc == alloc || m.to_alloc == alloc)
999 {
1000 // Skip pregs used by moves in this
1001 // parallel move set, even if not
1002 // marked used at progpoint: edge move
1003 // liveranges meet but don't overlap
1004 // so otherwise we may incorrectly
1005 // overwrite a source reg.
1006 continue;
1007 }
1008 return Some(alloc);
1009 }
1010 }
1011 None
1012 };
1013 let mut stackslot_idx = 0;
1014 let get_stackslot = || {
1015 let idx = stackslot_idx;
1016 stackslot_idx += 1;
1017 // We can't borrow `self` as mutable, so we create
1018 // these placeholders then allocate the actual
1019 // slots if needed with `self.allocate_spillslot`
1020 // below.
1021 Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx))
1022 };
1023 let is_stack_alloc = |alloc: Allocation| {
1024 if let Some(preg) = alloc.as_reg() {
1025 self.pregs[preg.index()].is_stack
1026 } else {
1027 alloc.is_stack()
1028 }
1029 };
1030 let preferred_victim = self.preferred_victim_by_class[regclass as usize];
1031
1032 let scratch_resolver = MoveAndScratchResolver::new(
1033 get_reg,
1034 get_stackslot,
1035 is_stack_alloc,
1036 preferred_victim,
1037 );
1038
1039 let resolved = scratch_resolver.compute(resolved);
1040
1041 let mut rewrites = FxHashMap::default();
1042 for i in 0..stackslot_idx {
1043 if i >= self.extra_spillslots_by_class[regclass as usize].len() {
1044 let slot =
1045 self.allocate_spillslot(self.func.spillslot_size(regclass) as u32);
1046 self.extra_spillslots_by_class[regclass as usize].push(slot);
1047 }
1048 rewrites.insert(
1049 Allocation::stack(SpillSlot::new(SpillSlot::MAX - i)),
1050 self.extra_spillslots_by_class[regclass as usize][i],
1051 );
1052 }
1053
1054 for (src, dst, to_vreg) in resolved {
1055 let src = rewrites.get(&src).cloned().unwrap_or(src);
1056 let dst = rewrites.get(&dst).cloned().unwrap_or(dst);
1057 trace!(" resolved: {} -> {} ({:?})", src, dst, to_vreg);
1058 let action = redundant_moves.process_move(src, dst, to_vreg);
1059 if !action.elide {
1060 self.add_move_edit(pos_prio, src, dst);
1061 } else {
1062 trace!(" -> redundant move elided");
1063 }
1064 }
1065 }
1066 }
1067
1068 // Ensure edits are in sorted ProgPoint order. N.B.: this must
1069 // be a stable sort! We have to keep the order produced by the
1070 // parallel-move resolver for all moves within a single sort
1071 // key.
1072 self.edits.sort_by_key(|&(pos_prio, _)| pos_prio.key());
1073 self.stats.edits_count = self.edits.len();
1074
1075 // Add debug annotations.
1076 if self.annotations_enabled {
1077 for i in 0..self.edits.len() {
1078 let &(pos_prio, ref edit) = &self.edits[i];
1079 match edit {
1080 &Edit::Move { from, to } => {
1081 self.annotate(pos_prio.pos, format!("move {} -> {}", from, to));
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 pub fn add_move_edit(&mut self, pos_prio: PosWithPrio, from: Allocation, to: Allocation) {
1089 if from != to {
1090 if from.is_reg() && to.is_reg() {
1091 debug_assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class());
1092 }
1093 self.edits.push((pos_prio, Edit::Move { from, to }));
1094 }
1095 }
1096}