1#![allow(clippy::undocumented_unsafe_blocks)]
2#![allow(unsafe_code)]
3
4use crate::cast::cast;
5use crate::program::{InstructionFormat, InstructionSet, InstructionSetKind, Opcode, ParsingVisitor, RawReg, UNUSED_RAW_OPCODE};
6use crate::utils::{Bitness, BitnessT, GasVisitorT, B64};
7use alloc::string::String;
8use alloc::vec;
9
10#[cfg(feature = "simd")]
11use picosimd::amd64::{
12 avx2::i8x32,
13 avx2_composite::{i16x32, i32x32},
14 sse::i8x16,
15};
16
17#[cfg(not(feature = "simd"))]
18use picosimd::fallback::{i16x32, i32x32, i8x16, i8x32};
19
20#[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
21macro_rules! unsafe_avx2 {
22 ($($t:tt)*) => { $($t)* }
23}
24
25#[cfg(all(feature = "simd", target_arch = "x86_64"))]
26macro_rules! unsafe_avx2 {
27 ($($t:tt)*) => { unsafe { $($t)* } }
28}
29
30#[derive(Copy, Clone, Debug, Hash)]
31#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
32pub enum CacheModel {
33 L1Hit,
34 L2Hit,
35 L3Hit,
36}
37
38const MAX_DECODE_PER_CYCLE: u32 = 4;
40
41const REORDER_BUFFER_SIZE: usize = 32;
43
44const GAS_COST_SLACK: i32 = 3;
46
47#[derive(Copy, Clone, Debug)]
48pub struct InstCost {
49 pub latency: i8,
50 pub decode_slots: u32,
51 pub alu_slots: u32,
52 pub mul_slots: u32,
53 pub div_slots: u32,
54 pub load_slots: u32,
55 pub store_slots: u32,
56}
57
58const MAX_ALU_SLOTS: u32 = 4;
59const MAX_LOAD_SLOTS: u32 = 4;
60const MAX_STORE_SLOTS: u32 = 4;
61const MAX_MUL_SLOTS: u32 = 1;
62const MAX_DIV_SLOTS: u32 = 1;
63
64const fn bits_needed(value: u32) -> u32 {
65 (value + 1).next_power_of_two().ilog2()
66}
67
68const ALU_BITS: u32 = bits_needed(MAX_ALU_SLOTS);
69const LOAD_BITS: u32 = bits_needed(MAX_LOAD_SLOTS);
70const STORE_BITS: u32 = bits_needed(MAX_STORE_SLOTS);
71const MUL_BITS: u32 = bits_needed(MAX_MUL_SLOTS);
72const DIV_BITS: u32 = bits_needed(MAX_DIV_SLOTS);
73
74#[allow(clippy::int_plus_one)]
75const _: () = {
76 assert!((1 << ALU_BITS) - 1 >= MAX_ALU_SLOTS);
77 assert!((1 << LOAD_BITS) - 1 >= MAX_LOAD_SLOTS);
78 assert!((1 << STORE_BITS) - 1 >= MAX_STORE_SLOTS);
79 assert!((1 << MUL_BITS) - 1 >= MAX_MUL_SLOTS);
80 assert!((1 << DIV_BITS) - 1 >= MAX_DIV_SLOTS);
81};
82
83const ALU_OFFSET: u32 = 0;
84const LOAD_OFFSET: u32 = ALU_OFFSET + ALU_BITS + 1;
85const STORE_OFFSET: u32 = LOAD_OFFSET + LOAD_BITS + 1;
86const MUL_OFFSET: u32 = STORE_OFFSET + STORE_BITS + 1;
87const DIV_OFFSET: u32 = MUL_OFFSET + MUL_BITS + 1;
88
89const RESOURCES_UNDERFLOW_MASK: u32 = (1 << (ALU_BITS + ALU_OFFSET))
90 | (1 << (LOAD_BITS + LOAD_OFFSET))
91 | (1 << (STORE_BITS + STORE_OFFSET))
92 | (1 << (MUL_BITS + MUL_OFFSET))
93 | (1 << (DIV_BITS + DIV_OFFSET));
94
95#[cfg(all(test, feature = "logging"))]
96struct DebugResources(u32);
97
98#[cfg(all(test, feature = "logging"))]
99impl core::fmt::Debug for DebugResources {
100 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
101 fmt.debug_struct("Resources")
102 .field("alu", &((self.0 >> ALU_OFFSET) & ((1 << ALU_BITS) - 1)))
103 .field("load", &((self.0 >> LOAD_OFFSET) & ((1 << LOAD_BITS) - 1)))
104 .field("store", &((self.0 >> STORE_OFFSET) & ((1 << STORE_BITS) - 1)))
105 .field("mul", &((self.0 >> MUL_OFFSET) & ((1 << MUL_BITS) - 1)))
106 .field("div", &((self.0 >> DIV_OFFSET) & ((1 << DIV_BITS) - 1)))
107 .finish()
108 }
109}
110
111#[cfg(all(test, feature = "logging"))]
112struct DebugDeps([i32; 32]);
113
114#[cfg(all(test, feature = "logging"))]
115impl core::fmt::Debug for DebugDeps {
116 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
117 fmt.write_str("{")?;
118 let iter = self.0.into_iter().enumerate().filter(|(_, deps)| *deps != 0);
119 let mut remaining = iter.clone().count();
120 for (nth, mut deps) in iter {
121 write!(fmt, "{nth}={{")?;
122 while deps != 0 {
123 let slot = deps.trailing_zeros();
124 deps &= !(1 << slot);
125 write!(fmt, "{slot}")?;
126 if deps != 0 {
127 fmt.write_str(",")?;
128 }
129 }
130 fmt.write_str("}")?;
131 remaining -= 1;
132 if remaining > 0 {
133 fmt.write_str(", ")?;
134 }
135 }
136 fmt.write_str("}")?;
137
138 Ok(())
139 }
140}
141
142#[cfg(all(test, feature = "logging"))]
143struct DebugMask([i8; 32]);
144
145#[cfg(all(test, feature = "logging"))]
146impl core::fmt::Debug for DebugMask {
147 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
148 fmt.write_str("{")?;
149 let iter = self.0.into_iter().enumerate().filter(|(_, mask)| *mask != 0);
150 let mut remaining = iter.clone().count();
151 for (nth, mask) in iter {
152 if mask == 0 {
153 continue;
154 } else if mask == -1 {
155 write!(fmt, "{nth}")?;
156 } else {
157 write!(fmt, "{nth}={{{mask}}}")?;
158 }
159
160 remaining -= 1;
161 if remaining > 0 {
162 fmt.write_str(", ")?;
163 }
164 }
165 fmt.write_str("}")?;
166
167 Ok(())
168 }
169}
170
171#[cfg(all(test, feature = "logging"))]
172struct DebugEntryByRegister([i8; 16]);
173
174#[cfg(all(test, feature = "logging"))]
175impl core::fmt::Debug for DebugEntryByRegister {
176 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
177 fmt.write_str("{")?;
178 let mut remaining = self.0.iter().filter(|&&entry| entry != -1).count();
179 for (reg, entry) in crate::program::Reg::ALL.into_iter().zip(self.0.into_iter()) {
180 if entry == -1 {
181 continue;
182 }
183
184 write!(fmt, "{reg}={entry}")?;
185 remaining -= 1;
186 if remaining > 0 {
187 fmt.write_str(", ")?;
188 }
189 }
190 fmt.write_str("}")?;
191
192 Ok(())
193 }
194}
195
196#[cfg(all(test, feature = "logging"))]
197struct DebugCyclesRemaining([i8; 32]);
198
199#[cfg(all(test, feature = "logging"))]
200impl core::fmt::Debug for DebugCyclesRemaining {
201 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
202 fmt.write_str("{")?;
203 let mut remaining = self.0.len();
204 for (index, count) in self.0.into_iter().enumerate() {
205 write!(fmt, "{index}={count}")?;
206 remaining -= 1;
207 if remaining > 0 {
208 fmt.write_str(", ")?;
209 }
210 }
211 fmt.write_str("}")?;
212
213 Ok(())
214 }
215}
216
217#[cfg(all(test, feature = "logging"))]
218struct DebugState([i8; 32]);
219
220#[cfg(all(test, feature = "logging"))]
221impl core::fmt::Debug for DebugState {
222 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
223 fmt.write_str("{")?;
224 let iter = self.0.into_iter().enumerate().filter(|(_, state)| *state != 0);
225 let mut remaining = iter.clone().count();
226 for (nth, state) in iter {
227 remaining -= 1;
228 let state = match state {
229 1 => 'D',
230 2 => 'w',
231 3 => 'e',
232 4 => 'X',
233 _ => {
234 write!(fmt, "{nth}={state}")?;
235 if remaining > 0 {
236 fmt.write_str(", ")?;
237 }
238 continue;
239 }
240 };
241
242 write!(fmt, "{nth}={state}")?;
243 if remaining > 0 {
244 fmt.write_str(", ")?;
245 }
246 }
247 fmt.write_str("}")?;
248
249 Ok(())
250 }
251}
252
253impl InstCost {
254 #[inline(always)]
255 const fn resources(&self) -> u32 {
256 assert!(self.alu_slots <= MAX_ALU_SLOTS);
257 assert!(self.mul_slots <= MAX_MUL_SLOTS);
258 assert!(self.div_slots <= MAX_DIV_SLOTS);
259 assert!(self.load_slots <= MAX_LOAD_SLOTS);
260 assert!(self.store_slots <= MAX_STORE_SLOTS);
261
262 (self.alu_slots << ALU_OFFSET)
263 | (self.load_slots << LOAD_OFFSET)
264 | (self.store_slots << STORE_OFFSET)
265 | (self.mul_slots << MUL_OFFSET)
266 | (self.div_slots << DIV_OFFSET)
267 }
268}
269
270const EMPTY_COST: InstCost = InstCost {
271 latency: 0,
272 decode_slots: 1,
273 alu_slots: 0,
274 mul_slots: 0,
275 div_slots: 0,
276 load_slots: 0,
277 store_slots: 0,
278};
279
280#[derive(Copy, Clone, Debug)]
281pub enum EventKind {
282 Decode,
283 WaitingForDependencies,
284 Executing,
285 Executed,
286 WaitingForRetirement,
287 Retired,
288}
289
290impl From<EventKind> for char {
291 fn from(kind: EventKind) -> char {
292 match kind {
293 EventKind::Decode => 'D',
294 EventKind::WaitingForDependencies => '=',
295 EventKind::Executing => 'e',
296 EventKind::Executed => 'E',
297 EventKind::WaitingForRetirement => '-',
298 EventKind::Retired => 'R',
299 }
300 }
301}
302
303pub trait Tracer: Sized {
304 const SHOULD_CALL_ON_EVENT: bool;
306
307 fn should_enable_fast_forward(&self) -> bool {
308 true
309 }
310
311 fn on_event(&mut self, _cycle: u32, _instruction: u32, _event: EventKind) {}
312}
313
314impl Tracer for () {
315 const SHOULD_CALL_ON_EVENT: bool = false;
316}
317
318pub struct Simulator<'a, B, T: Tracer = ()> {
319 code: &'a [u8],
321 cycles: u32,
323 instructions: u32,
325 finished: bool,
327 decode_slots_remaining_this_cycle: u32,
329 resources_available: u32,
331 instructions_in_flight: u32,
333 reorder_buffer_head: u32,
335 reorder_buffer_tail: u32,
337 rob_instruction: [u32; REORDER_BUFFER_SIZE],
340 rob_state: i8x32,
342 rob_cycles_remaining: i8x32,
344 rob_required_resources: i16x32,
346 rob_dependencies: i32x32,
348 rob_depended_by: i32x32,
350 registers_written_by_rob_entry: i16x32,
352 rob_entry_by_register: i8x16,
354 cache_model: CacheModel,
356 force_branch_is_cheap: Option<bool>,
358
359 opcode_trap: u8,
360 opcode_unlikely: u8,
361
362 tracer: T,
363 _phantom: core::marker::PhantomData<B>,
364}
365
366impl<'a, B, T> Simulator<'a, B, T>
367where
368 T: Tracer,
369 B: BitnessT,
370{
371 pub fn new(code: &'a [u8], isa: InstructionSetKind, cache_model: CacheModel, tracer: T) -> Self {
372 unsafe_avx2! {
373 let mut simulator = Simulator {
374 code,
375 rob_instruction: [0; REORDER_BUFFER_SIZE],
376 cycles: 0,
377 instructions: 0,
378 finished: false,
379 decode_slots_remaining_this_cycle: 0,
380 resources_available: 0,
381 rob_state: i8x32::zero(),
382 rob_cycles_remaining: i8x32::zero(),
383 rob_required_resources: i16x32::zero(),
384 rob_dependencies: i32x32::zero(),
385 rob_depended_by: i32x32::zero(),
386 registers_written_by_rob_entry: i16x32::zero(),
387 rob_entry_by_register: i8x16::zero(),
388 reorder_buffer_tail: 0,
389 cache_model,
390 tracer,
391 force_branch_is_cheap: None,
392 instructions_in_flight: 0,
393 reorder_buffer_head: 0,
394 opcode_trap: isa.opcode_to_u8(Opcode::trap).unwrap_or(UNUSED_RAW_OPCODE),
395 opcode_unlikely: isa.opcode_to_u8(Opcode::unlikely).unwrap_or(UNUSED_RAW_OPCODE),
396 _phantom: core::marker::PhantomData,
397 };
398
399 simulator.clear();
400 simulator
401 }
402 }
403
404 pub fn set_force_branch_is_cheap(&mut self, value: Option<bool>) {
405 self.force_branch_is_cheap = value;
406 }
407
408 fn clear(&mut self) {
409 self.cycles = 0;
410 self.instructions = 0;
411 self.finished = false;
412 self.instructions_in_flight = 0;
413 self.decode_slots_remaining_this_cycle = MAX_DECODE_PER_CYCLE;
414 self.resources_available = InstCost {
415 alu_slots: MAX_ALU_SLOTS,
416 mul_slots: MAX_MUL_SLOTS,
417 div_slots: MAX_DIV_SLOTS,
418 load_slots: MAX_LOAD_SLOTS,
419 store_slots: MAX_STORE_SLOTS,
420 ..EMPTY_COST
421 }
422 .resources()
423 | RESOURCES_UNDERFLOW_MASK;
424
425 self.reorder_buffer_tail = 0;
426 self.reorder_buffer_head = 0;
427
428 unsafe_avx2! {
429 self.rob_entry_by_register = i8x16::negative_one();
430 self.rob_state = i8x32::zero();
431 self.rob_cycles_remaining = i8x32::zero();
432 self.rob_required_resources = i16x32::zero();
433 self.rob_dependencies = i32x32::zero();
434 self.rob_depended_by = i32x32::zero();
435 self.registers_written_by_rob_entry = i16x32::zero();
436 }
437
438 if T::SHOULD_CALL_ON_EVENT {
439 self.rob_instruction.fill(0);
440 }
441 }
442
443 fn emit_event(&mut self, slot: u32, kind: EventKind) {
444 if T::SHOULD_CALL_ON_EVENT {
445 self.tracer.on_event(self.cycles, self.rob_instruction[cast(slot).to_usize()], kind);
446 }
447 }
448
449 fn tick_cycle<const FAST_FORWARD: bool>(&mut self) {
450 unsafe_avx2! {
451 self.tick_cycle_avx2::<FAST_FORWARD>();
452 }
453 }
454
455 #[cfg_attr(all(feature = "simd", target_arch = "x86_64"), target_feature(enable = "avx2"))]
456 #[inline(never)]
457 fn emit_events_avx2(&mut self, mask: i8x32, event_kind: EventKind) {
458 if !T::SHOULD_CALL_ON_EVENT {
459 return;
460 }
461
462 let mut bits = mask.most_significant_bits();
463 while bits != 0 {
464 let slot = bits.trailing_zeros();
465 self.emit_event(slot, event_kind);
466 bits &= !(1 << slot);
467 }
468 }
469
470 fn instructions_in_flight(&self) -> u32 {
471 self.instructions_in_flight
472 }
473
474 #[cfg_attr(all(feature = "simd", target_arch = "x86_64"), target_feature(enable = "avx2"))]
475 fn tick_cycle_avx2<const FAST_FORWARD: bool>(&mut self) {
476 let state_decoding = i8x32::splat(1);
477 let state_waiting = i8x32::splat(2);
478 let state_executing = i8x32::splat(3);
479 let state_executed = i8x32::splat(4);
480
481 #[cfg(test)]
482 let original_state = self.rob_state;
483 #[cfg(test)]
484 let original_cycles_remaining = self.rob_cycles_remaining;
485 #[cfg(test)]
486 let original_dependencies = self.rob_dependencies;
487 #[cfg(test)]
488 let original_depended_by = self.rob_depended_by;
489 #[cfg(test)]
490 let original_entry_by_register = self.rob_entry_by_register;
491 #[cfg(test)]
492 let original_required_resources = self.rob_required_resources;
493 #[cfg(test)]
494 let original_decode_slots = self.decode_slots_remaining_this_cycle;
495 #[cfg(test)]
496 let original_reorder_buffer_head = self.reorder_buffer_head;
497 #[cfg(test)]
498 let original_resources_available = self.resources_available;
499 #[cfg(test)]
500 let original_instructions_in_flight = self.instructions_in_flight;
501
502 #[cfg(all(test, feature = "logging"))]
503 log::debug!(
504 "tick_cycle_avx2[{}]: state={:?}",
505 self.cycles,
506 DebugState(self.rob_state.to_array())
507 );
508 #[cfg(all(test, feature = "logging"))]
509 log::debug!(
510 "tick_cycle_avx2[{}]: cycles={:?}",
511 self.cycles,
512 DebugCyclesRemaining(self.rob_cycles_remaining.to_array())
513 );
514 #[cfg(all(test, feature = "logging"))]
515 log::debug!(
516 "tick_cycle_avx2[{}]: dependencies={:?}",
517 self.cycles,
518 DebugDeps(self.rob_dependencies.to_array())
519 );
520 #[cfg(all(test, feature = "logging"))]
521 log::debug!(
522 "tick_cycle_avx2[{}]: depended_by={:?}",
523 self.cycles,
524 DebugDeps(self.rob_depended_by.to_array())
525 );
526 #[cfg(all(test, feature = "logging"))]
527 log::debug!(
528 "tick_cycle_avx2[{}]: entry_by_register={:?}",
529 self.cycles,
530 DebugEntryByRegister(self.rob_entry_by_register.to_array())
531 );
532 #[cfg(all(test, feature = "logging"))]
533 log::debug!(
534 "tick_cycle_avx2[{}]: resources_available={:?}",
535 self.cycles,
536 DebugResources(self.resources_available)
537 );
538
539 debug_assert_eq!(
540 self.rob_state.simd_eq(i8x32::zero()).most_significant_bits().count_zeros(),
541 self.instructions_in_flight
542 );
543
544 {
546 let is_waiting_for_retirement: i8x32 = self.rob_state.simd_eq(state_executed);
547 let leading_count_to_retire = is_waiting_for_retirement
548 .most_significant_bits()
549 .rotate_right(self.reorder_buffer_head)
550 .trailing_ones() as i32;
551
552 let is_retired_this_cycle = i8x32::from_i1x32_sext(
553 (cast(1_u64 << leading_count_to_retire).truncate_to_u32().wrapping_sub(1)).rotate_left(self.reorder_buffer_head) as i32,
554 );
555
556 self.rob_dependencies = self
558 .rob_dependencies
559 .and_not(i32x32::splat(is_retired_this_cycle.most_significant_bits()));
560
561 self.rob_depended_by = self.rob_depended_by.and_not(i32x32::from_i8x32_sext(is_retired_this_cycle));
563
564 self.rob_state = self.rob_state.and_not(is_retired_this_cycle);
566
567 let retired_count = is_retired_this_cycle.most_significant_bits().count_ones();
568 #[cfg(all(test, feature = "logging"))]
569 if retired_count > 0 {
570 log::debug!(
571 "tick_cycle_avx2[{}]: instructions_in_flight: {} -> {}",
572 self.cycles,
573 self.instructions_in_flight,
574 self.instructions_in_flight - retired_count
575 );
576 }
577
578 self.instructions_in_flight -= retired_count;
579 self.reorder_buffer_head = (self.reorder_buffer_head + retired_count) % (REORDER_BUFFER_SIZE as u32);
580
581 self.emit_events_avx2(is_retired_this_cycle, EventKind::Retired);
582 self.emit_events_avx2(
583 is_waiting_for_retirement.and_not(is_retired_this_cycle),
584 EventKind::WaitingForRetirement,
585 );
586
587 debug_assert_eq!(
588 self.rob_state.simd_eq(i8x32::zero()).most_significant_bits().count_zeros(),
589 self.instructions_in_flight
590 );
591 }
592
593 {
594 const RESOURCES_UNDERFLOW_MASK_I16: i16 = RESOURCES_UNDERFLOW_MASK as u16 as i16;
595 let is_executed: i8x32 = self.rob_cycles_remaining.simd_lt(i8x32::splat(1));
596 let is_executed_mask: i32 = is_executed.most_significant_bits();
597 let has_no_dependencies: i8x32 = (self.rob_dependencies.and_not(i32x32::splat(is_executed_mask)))
598 .simd_eq(i32x32::zero())
599 .clamp_to_i8_range();
600
601 let mut is_waiting_to_start: i8x32 = self.rob_state.simd_eq(state_waiting) & has_no_dependencies;
602
603 for _ in 0..5 {
604 #[cfg(all(test, feature = "logging"))]
605 if is_waiting_to_start.most_significant_bits() != 0 {
606 log::debug!(
607 "tick_cycle_avx2[{}]: is_waiting_to_start={:?}",
608 self.cycles,
609 DebugMask(is_waiting_to_start.to_array())
610 );
611 }
612 debug_assert_eq!(self.resources_available & RESOURCES_UNDERFLOW_MASK, RESOURCES_UNDERFLOW_MASK);
613
614 let new_resources: i16x32 = i16x32::splat(self.resources_available as i16) - self.rob_required_resources;
615 let have_enough_resources: i8x32 = (new_resources.and(i16x32::splat(RESOURCES_UNDERFLOW_MASK_I16)))
616 .simd_eq(i16x32::splat(RESOURCES_UNDERFLOW_MASK_I16))
617 .clamp_to_i8_range();
618 let have_enough_resources = have_enough_resources.and(is_waiting_to_start);
619 let mask = have_enough_resources.most_significant_bits().rotate_right(self.reorder_buffer_head);
620 let position = mask.trailing_zeros();
621 if position != 32 {
622 let position = (position + self.reorder_buffer_head) % (REORDER_BUFFER_SIZE as u32);
623 #[cfg(all(test, feature = "logging"))]
624 log::debug!(
625 "tick_cycle_avx2[{}]: starting: instruction={}, slot={}",
626 self.cycles,
627 self.rob_instruction[cast(position).to_usize()],
628 position,
629 );
630
631 let resources_consumed = self.rob_required_resources.as_slice()[cast(position).to_usize()];
632 self.resources_available -= resources_consumed as u32;
633 self.rob_state.as_slice_mut()[cast(position).to_usize()] += 1;
634 is_waiting_to_start.as_slice_mut()[cast(position).to_usize()] = 0;
635 }
636 }
637 self.emit_events_avx2(self.rob_state.simd_eq(state_waiting), EventKind::WaitingForDependencies);
638 }
639
640 let mut cycle_count = 1;
642 {
643 let is_executing: i8x32 = self.rob_state.simd_eq(state_executing);
644 if FAST_FORWARD {
645 let max_cycles =
646 ((self.rob_cycles_remaining & is_executing) | (is_executing ^ i8x32::negative_one())).horizontal_min_unsigned();
647 let max_cycles = cast(max_cycles).to_signed();
648
649 #[cfg(all(test, feature = "logging"))]
650 log::debug!("tick_cycle_avx2[{}]: max_cycles={}", self.cycles, max_cycles);
651 if max_cycles > 0 && self.decode_slots_remaining_this_cycle == MAX_DECODE_PER_CYCLE {
652 cycle_count = max_cycles;
653 }
654 }
655
656 self.rob_cycles_remaining = self.rob_cycles_remaining.saturating_sub(i8x32::splat(cycle_count) & is_executing);
657
658 let is_execution_finished: i8x32 = self.rob_cycles_remaining.simd_eq(i8x32::zero()) & is_executing;
660 let is_execution_finished = is_execution_finished.to_i16x32_sext();
661
662 #[cfg(all(test, feature = "logging"))]
663 log::debug!(
664 "tick_cycle_avx2[{}]: is_execution_finished={:?}",
665 self.cycles,
666 is_execution_finished
667 );
668
669 let retired_register_writes: i16 = (self.registers_written_by_rob_entry & is_execution_finished).bitwise_reduce();
670 self.registers_written_by_rob_entry = self.registers_written_by_rob_entry.and_not(is_execution_finished);
671 self.rob_entry_by_register = self.rob_entry_by_register.or(i8x16::from_i1x16_sext(retired_register_writes));
672
673 let resources_released = cast((self.rob_required_resources & is_execution_finished).wrapping_reduce()).to_unsigned();
675 self.resources_available += u32::from(resources_released);
676 self.rob_required_resources = self.rob_required_resources.and_not(is_execution_finished);
677
678 let is_last_cycle = self.rob_cycles_remaining.simd_eq(i8x32::negative_one());
679 let has_cycles_remaining = self.rob_cycles_remaining.simd_gt(i8x32::negative_one());
680 self.rob_state += i8x32::splat(1) & is_executing.and(is_last_cycle);
681 self.emit_events_avx2(is_executing.and(is_last_cycle), EventKind::Executed);
682 self.emit_events_avx2(is_executing.and(has_cycles_remaining), EventKind::Executing);
683 }
684
685 {
687 let is_decoding = self.rob_state.simd_eq(state_decoding);
688 self.rob_state += i8x32::splat(1) & is_decoding;
689 }
690
691 self.decode_slots_remaining_this_cycle = MAX_DECODE_PER_CYCLE;
692 self.cycles += cast(i32::from(cycle_count)).to_unsigned();
693
694 #[cfg(all(test, feature = "logging"))]
695 {
696 if self.rob_state != original_state {
697 log::debug!("tick_cycle_avx2[{}]: state changed!", self.cycles);
698 } else {
699 log::debug!("tick_cycle_avx2[{}]: state did NOT change!", self.cycles);
700 }
701 }
702
703 #[cfg(test)]
704 {
705 assert!(
706 self.instructions_in_flight != original_instructions_in_flight
707 || self.reorder_buffer_head != original_reorder_buffer_head
708 || self.decode_slots_remaining_this_cycle != original_decode_slots
709 || self.resources_available != original_resources_available
710 || self.rob_state != original_state
711 || self.rob_cycles_remaining.max_signed(i8x32::negative_one())
712 != original_cycles_remaining.max_signed(i8x32::negative_one())
713 || self.rob_dependencies != original_dependencies
714 || self.rob_depended_by != original_depended_by
715 || self.rob_entry_by_register != original_entry_by_register
716 || self.rob_required_resources != original_required_resources,
717 "made no progress"
718 );
719 }
720 }
721
722 #[inline(always)]
723 fn tick_cycle_if_cannot_decode(&mut self, decode_slots: u32) {
724 let mut should_tick =
725 self.decode_slots_remaining_this_cycle < decode_slots || self.instructions_in_flight() == (REORDER_BUFFER_SIZE as u32);
726 while should_tick {
727 self.tick_cycle::<false>();
728 should_tick = self.instructions_in_flight() == (REORDER_BUFFER_SIZE as u32);
729 }
730 }
731
732 #[inline(always)]
733 fn wait_until_empty(&mut self) {
734 #[cfg(all(test, feature = "logging"))]
735 if self.instructions_in_flight() > 0 {
736 log::debug!("wait_until_empty[{}]: starting fast forward!", self.cycles);
737 }
738
739 while self.instructions_in_flight() > 0 {
740 if self.tracer.should_enable_fast_forward() {
741 self.tick_cycle::<true>();
742 } else {
743 self.tick_cycle::<false>();
744 }
745 }
746 }
747
748 fn dispatch_generic(&mut self, dst: Option<RawReg>, src1: Option<RawReg>, src2: Option<RawReg>, cost: InstCost) {
749 #[cfg(all(test, feature = "logging"))]
750 log::debug!(
751 "dispatch[{}]: instruction={:?}, dst={:?}, src=[{:?}, {:?}], slots={}, latency={}, alu={}, load={}, store={}, mul={}, div={}",
752 self.cycles,
753 self.instructions,
754 dst.map(|reg| reg.get()),
755 src1.map(|reg| reg.get()),
756 src2.map(|reg| reg.get()),
757 cost.decode_slots,
758 cost.latency,
759 cost.alu_slots,
760 cost.load_slots,
761 cost.store_slots,
762 cost.mul_slots,
763 cost.div_slots,
764 );
765
766 debug_assert!(cost.latency >= 0);
767 unsafe_avx2! { self.dispatch_generic_avx2(dst, src1, src2, cost) }
768 }
769
770 #[cfg_attr(all(feature = "simd", target_arch = "x86_64"), target_feature(enable = "avx2"))]
771 fn dispatch_generic_avx2(&mut self, dst: Option<RawReg>, src1: Option<RawReg>, src2: Option<RawReg>, cost: InstCost) {
772 let dst = dst.map(|dst| dst.get());
773 let src1 = src1.map(|src1| src1.get());
774 let src2 = src2.map(|src2| src2.get());
775
776 self.tick_cycle_if_cannot_decode(cost.decode_slots);
777 if T::SHOULD_CALL_ON_EVENT {
778 self.tracer.on_event(self.cycles, self.instructions, EventKind::Decode);
779 }
780
781 let slot = self.reorder_buffer_tail;
782 self.reorder_buffer_tail = (self.reorder_buffer_tail + 1) % (REORDER_BUFFER_SIZE as u32);
783 let slot_mask = i8x32::zero().set_dynamic(cast(slot).truncate_to_u8(), cast(0xff_u8).to_signed());
784
785 self.rob_cycles_remaining = self.rob_cycles_remaining.set_dynamic(slot as u8, cost.latency);
786 self.rob_required_resources.as_slice_mut()[slot as usize] = cost.resources() as u16 as i16;
787
788 let dependency_1: Option<u32> = src1
789 .map(|src1| self.rob_entry_by_register.as_slice()[src1.to_usize()])
790 .map(i32::from)
791 .map(|x| cast(x).to_unsigned());
792 let dependency_2: Option<u32> = src2
793 .map(|src2| self.rob_entry_by_register.as_slice()[src2.to_usize()])
794 .map(i32::from)
795 .map(|x| cast(x).to_unsigned());
796 match (dependency_1, dependency_2) {
797 (Some(dependency_1), Some(dependency_2)) => {
798 let base_1 = (dependency_1 >> 31) ^ 1;
799 let base_2 = (dependency_2 >> 31) ^ 1;
800 let dependencies_mask = cast(base_1.wrapping_shl(dependency_1) | base_2.wrapping_shl(dependency_2)).to_signed();
801 self.rob_dependencies.as_slice_mut()[slot as usize] = dependencies_mask;
802 self.rob_depended_by.as_slice_mut()[(dependency_1 * base_1) as usize] |= cast(base_1 << slot).to_signed();
803 self.rob_depended_by.as_slice_mut()[(dependency_2 * base_2) as usize] |= cast(base_2 << slot).to_signed();
804 }
805 (Some(dependency), None) | (None, Some(dependency)) => {
806 let base = (dependency >> 31) ^ 1;
807 self.rob_dependencies.as_slice_mut()[slot as usize] = cast(base.wrapping_shl(dependency)).to_signed();
808 self.rob_depended_by.as_slice_mut()[(dependency * base) as usize] |= cast(base.wrapping_shl(slot)).to_signed();
809 }
810 (None, None) => {}
811 }
812
813 if let Some(dst) = dst {
814 let dst_mask: i16x32 = i16x32::splat(cast(cast(1_u32 << dst.to_u32()).truncate_to_u16()).to_signed());
815 self.registers_written_by_rob_entry =
816 self.registers_written_by_rob_entry.and_not(dst_mask) | (slot_mask.to_i16x32_sext() & dst_mask);
817 self.rob_entry_by_register.as_slice_mut()[dst.to_usize()] = cast(cast(slot).truncate_to_u8()).to_signed();
818 }
819
820 self.rob_state = self.rob_state.set_dynamic(slot as u8, 1);
821 if T::SHOULD_CALL_ON_EVENT {
822 self.rob_instruction[cast(slot).to_usize()] = self.instructions;
823 }
824
825 self.instructions_in_flight += 1;
826 self.decode_slots_remaining_this_cycle -= cost.decode_slots;
827 self.instructions += 1;
828
829 debug_assert_eq!(
830 self.rob_state.simd_eq(i8x32::zero()).most_significant_bits().count_zeros(),
831 self.instructions_in_flight
832 );
833 }
834
835 fn dispatch_move_reg_avx2(&mut self, dst: RawReg, src: RawReg) {
836 let dst = dst.get();
837 let src = src.get();
838
839 self.tick_cycle_if_cannot_decode(1);
840 if T::SHOULD_CALL_ON_EVENT {
841 self.tracer.on_event(self.cycles, self.instructions, EventKind::Decode);
842 }
843
844 let entry_by_register = self.rob_entry_by_register.as_slice_mut();
845 let registers_written_by_rob_entry = self.registers_written_by_rob_entry.as_slice_mut();
846 let old_slot = entry_by_register[dst.to_usize()];
847 if old_slot != -1 {
848 registers_written_by_rob_entry[old_slot as usize] &= !(1_i16 << dst.to_usize());
849 }
850
851 let new_slot = entry_by_register[src.to_usize()];
852 if new_slot != -1 {
853 registers_written_by_rob_entry[new_slot as usize] |= 1 << dst.to_usize();
854 }
855
856 entry_by_register[dst.to_usize()] = new_slot;
857 self.decode_slots_remaining_this_cycle -= 1;
858 self.instructions += 1;
859 }
860
861 fn dispatch_3op(&mut self, dst: RawReg, src1: RawReg, src2: RawReg, cost: InstCost) {
862 self.dispatch_generic(Some(dst), Some(src1), Some(src2), cost);
863 }
864
865 fn dispatch_2op(&mut self, dst: RawReg, src: RawReg, cost: InstCost) {
866 self.dispatch_generic(Some(dst), Some(src), None, cost);
867 }
868
869 fn dispatch_1op_dst(&mut self, dst: RawReg, cost: InstCost) {
870 self.dispatch_generic(Some(dst), None, None, cost);
871 }
872
873 fn dispatch_finish(&mut self, latency: i8) {
874 self.dispatch_generic(
875 None,
876 None,
877 None,
878 InstCost {
879 latency,
880 decode_slots: 1,
881 ..EMPTY_COST
882 },
883 );
884
885 self.wait_until_empty();
886 self.finished = true;
887 }
888
889 fn load_cost(&self) -> InstCost {
890 const L1_HIT: i8 = 4;
891 const L2_HIT: i8 = 25;
892 const L3_HIT: i8 = 37;
893
894 let latency = match self.cache_model {
895 CacheModel::L1Hit => L1_HIT,
896 CacheModel::L2Hit => L2_HIT,
897 CacheModel::L3Hit => L3_HIT,
898 };
899
900 InstCost {
901 latency,
902 decode_slots: 1,
903 alu_slots: 1,
904 load_slots: 1,
905 ..EMPTY_COST
906 }
907 }
908
909 fn dispatch_indirect_load(&mut self, dst: RawReg, base: RawReg, _offset: u32, _size: u32) {
910 self.dispatch_2op(dst, base, self.load_cost());
911 }
912
913 fn dispatch_load(&mut self, dst: RawReg, _offset: u32, _size: u32) {
914 self.dispatch_1op_dst(dst, self.load_cost());
915 }
916
917 #[allow(clippy::unused_self)]
918 fn store_cost(&self) -> InstCost {
919 InstCost {
920 latency: 25,
921 decode_slots: 1,
922 alu_slots: 1,
923 store_slots: 1,
924 ..EMPTY_COST
925 }
926 }
927
928 fn dispatch_store(&mut self, src: RawReg, _offset: u32, _size: u32) {
929 self.dispatch_generic(None, Some(src), None, self.store_cost());
930 }
931
932 fn dispatch_store_imm(&mut self, _offset: u32, _size: u32) {
933 self.dispatch_generic(None, None, None, self.store_cost());
934 }
935
936 fn dispatch_store_indirect(&mut self, src: RawReg, base: RawReg, _offset: u32, _size: u32) {
937 self.dispatch_generic(None, Some(src), Some(base), self.store_cost());
938 }
939
940 fn dispatch_store_imm_indirect(&mut self, base: RawReg, _offset: u32, _size: u32) {
941 self.dispatch_generic(None, Some(base), None, self.store_cost());
942 }
943
944 fn get_branch_cost(&self, offset: u32, args_length: u32, jump_offset: u32) -> i8 {
945 const BRANCH_PREDICTION_HIT_COST: i8 = 1;
946 const BRANCH_PREDICTION_MISS_COST: i8 = 20;
947
948 if let Some(is_hit) = self.force_branch_is_cheap {
949 return if is_hit {
950 BRANCH_PREDICTION_HIT_COST
951 } else {
952 BRANCH_PREDICTION_MISS_COST
953 };
954 }
955
956 if self
957 .code
958 .get(cast(offset).to_usize() + cast(args_length).to_usize())
959 .map(|&opcode| opcode == self.opcode_unlikely || opcode == self.opcode_trap)
960 .unwrap_or(true)
961 {
962 return BRANCH_PREDICTION_HIT_COST;
963 }
964
965 if self
966 .code
967 .get(cast(jump_offset).to_usize())
968 .map(|&opcode| opcode == self.opcode_unlikely || opcode == self.opcode_trap)
969 .unwrap_or(true)
970 {
971 return BRANCH_PREDICTION_HIT_COST;
972 }
973
974 BRANCH_PREDICTION_MISS_COST
975 }
976
977 fn dispatch_branch(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, jump_offset: u32) {
978 self.dispatch_generic(
979 None,
980 Some(s1),
981 Some(s2),
982 InstCost {
983 latency: self.get_branch_cost(offset, args_length, jump_offset),
984 decode_slots: 1,
985 alu_slots: 1,
986 ..EMPTY_COST
987 },
988 );
989 self.wait_until_empty();
990 self.finished = true;
991 }
992
993 fn dispatch_branch_imm(&mut self, offset: u32, args_length: u32, s: RawReg, jump_offset: u32) {
994 self.dispatch_generic(
995 None,
996 Some(s),
997 None,
998 InstCost {
999 latency: self.get_branch_cost(offset, args_length, jump_offset),
1000 decode_slots: 1,
1001 alu_slots: 1,
1002 ..EMPTY_COST
1003 },
1004 );
1005 self.wait_until_empty();
1006 self.finished = true;
1007 }
1008
1009 fn dispatch_trivial_2op_1c(&mut self, d: RawReg, s: RawReg) {
1010 self.dispatch_2op(
1011 d,
1012 s,
1013 InstCost {
1014 latency: 1,
1015 decode_slots: 1,
1016 alu_slots: 1,
1017 ..EMPTY_COST
1018 },
1019 );
1020 }
1021
1022 fn dispatch_trivial_2op_2c(&mut self, d: RawReg, s: RawReg) {
1023 self.dispatch_2op(
1024 d,
1025 s,
1026 InstCost {
1027 latency: 2,
1028 decode_slots: 1,
1029 alu_slots: 2,
1030 ..EMPTY_COST
1031 },
1032 );
1033 }
1034
1035 fn dispatch_simple_alu_2op(&mut self, d: RawReg, s: RawReg) {
1036 self.dispatch_2op(
1037 d,
1038 s,
1039 InstCost {
1040 latency: 1,
1041 decode_slots: 1 + u32::from(d.get() != s.get()),
1042 alu_slots: 1,
1043 ..EMPTY_COST
1044 },
1045 );
1046 }
1047
1048 fn dispatch_simple_alu_2op_32bit(&mut self, d: RawReg, s: RawReg) {
1049 self.dispatch_2op(
1050 d,
1051 s,
1052 InstCost {
1053 latency: 1 + i8::from(B::BITNESS == Bitness::B64),
1054 decode_slots: 1 + u32::from(d.get() != s.get()) + u32::from(B::BITNESS == Bitness::B64),
1055 alu_slots: 1,
1056 ..EMPTY_COST
1057 },
1058 );
1059 }
1060
1061 fn dispatch_simple_alu_3op(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1062 self.dispatch_3op(
1063 d,
1064 s1,
1065 s2,
1066 InstCost {
1067 latency: 1,
1068 decode_slots: 1 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
1069 alu_slots: 1,
1070 ..EMPTY_COST
1071 },
1072 );
1073 }
1074
1075 fn dispatch_simple_alu_3op_32(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1076 self.dispatch_3op(
1077 d,
1078 s1,
1079 s2,
1080 InstCost {
1081 latency: 1 + i8::from(B::BITNESS == Bitness::B64),
1082 decode_slots: 1 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())) + u32::from(B::BITNESS == Bitness::B64),
1083 alu_slots: 1,
1084 ..EMPTY_COST
1085 },
1086 );
1087 }
1088
1089 fn dispatch_shift(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1090 self.dispatch_3op(
1091 d,
1092 s1,
1093 s2,
1094 InstCost {
1095 latency: 1,
1096 decode_slots: 2 + u32::from(d.get() != s1.get()),
1097 alu_slots: 1,
1098 ..EMPTY_COST
1099 },
1100 )
1101 }
1102
1103 fn dispatch_shift_32(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1104 self.dispatch_3op(
1105 d,
1106 s1,
1107 s2,
1108 InstCost {
1109 latency: 1 + i8::from(B::BITNESS == Bitness::B64),
1110 decode_slots: 2 + u32::from(d.get() != s1.get()) + u32::from(B::BITNESS == Bitness::B64),
1111 alu_slots: 1,
1112 ..EMPTY_COST
1113 },
1114 )
1115 }
1116
1117 fn dispatch_shift_imm_alt(&mut self, d: RawReg, s: RawReg) {
1118 self.dispatch_2op(
1119 d,
1120 s,
1121 InstCost {
1122 latency: 1,
1123 decode_slots: 3,
1124 alu_slots: 1,
1125 ..EMPTY_COST
1126 },
1127 )
1128 }
1129
1130 fn dispatch_shift_imm_alt_32(&mut self, d: RawReg, s: RawReg) {
1131 self.dispatch_2op(
1132 d,
1133 s,
1134 InstCost {
1135 latency: 2,
1136 decode_slots: 4,
1137 alu_slots: 1,
1138 ..EMPTY_COST
1139 },
1140 )
1141 }
1142
1143 fn dispatch_compare(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1144 self.dispatch_3op(
1145 d,
1146 s1,
1147 s2,
1148 InstCost {
1149 latency: 3,
1150 decode_slots: 3,
1151 alu_slots: 1,
1152 ..EMPTY_COST
1153 },
1154 )
1155 }
1156
1157 fn dispatch_compare_imm(&mut self, d: RawReg, s: RawReg) {
1158 self.dispatch_2op(
1159 d,
1160 s,
1161 InstCost {
1162 latency: 3,
1163 decode_slots: 3,
1164 alu_slots: 1,
1165 ..EMPTY_COST
1166 },
1167 )
1168 }
1169
1170 fn dispatch_cmov(&mut self, d: RawReg, s: RawReg, c: RawReg) {
1171 self.dispatch_3op(
1172 d,
1173 s,
1174 c,
1175 InstCost {
1176 latency: 2,
1177 decode_slots: 2,
1178 alu_slots: 1,
1179 ..EMPTY_COST
1180 },
1181 )
1182 }
1183
1184 fn dispatch_cmov_imm(&mut self, d: RawReg, c: RawReg) {
1185 self.dispatch_2op(
1186 d,
1187 c,
1188 InstCost {
1189 latency: 2,
1190 decode_slots: 3,
1191 alu_slots: 1,
1192 ..EMPTY_COST
1193 },
1194 )
1195 }
1196
1197 fn dispatch_min_max(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1198 self.dispatch_3op(
1199 d,
1200 s1,
1201 s2,
1202 InstCost {
1203 latency: 3,
1204 decode_slots: 2 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
1205 alu_slots: 1,
1206 ..EMPTY_COST
1207 },
1208 )
1209 }
1210
1211 fn dispatch_division(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1212 self.dispatch_3op(
1213 d,
1214 s1,
1215 s2,
1216 InstCost {
1217 latency: 60,
1218 decode_slots: 4,
1219 alu_slots: 1,
1220 div_slots: 1,
1221 ..EMPTY_COST
1222 },
1223 )
1224 }
1225}
1226
1227impl<'a, B, T> GasVisitorT for Simulator<'a, B, T>
1228where
1229 B: BitnessT,
1230 T: Tracer,
1231{
1232 #[inline]
1233 fn take_block_cost(&mut self) -> Option<u32> {
1234 if (self.instructions_in_flight() == 0) & self.finished {
1235 let cycles = self.cycles;
1236 self.clear();
1237
1238 let cycles = cast((cast(cycles).to_signed() - GAS_COST_SLACK).max(1)).to_unsigned();
1239 Some(cycles)
1240 } else {
1241 None
1242 }
1243 }
1244
1245 fn is_at_start_of_basic_block(&self) -> bool {
1246 self.instructions == 0
1247 }
1248}
1249
1250impl<'a, B, T> ParsingVisitor for Simulator<'a, B, T>
1251where
1252 B: BitnessT,
1253 T: Tracer,
1254{
1255 type ReturnTy = ();
1256
1257 #[inline(always)]
1260 fn xor(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1261 self.dispatch_simple_alu_3op(d, s1, s2)
1262 }
1263
1264 #[inline(always)]
1265 fn and(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1266 self.dispatch_simple_alu_3op(d, s1, s2)
1267 }
1268
1269 #[inline(always)]
1270 fn or(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1271 self.dispatch_simple_alu_3op(d, s1, s2)
1272 }
1273
1274 #[inline(always)]
1275 fn add_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1276 self.dispatch_simple_alu_3op(d, s1, s2)
1277 }
1278
1279 #[inline(always)]
1280 fn sub_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1281 self.dispatch_simple_alu_3op(d, s1, s2)
1282 }
1283
1284 #[inline(always)]
1287 fn add_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1288 self.dispatch_simple_alu_3op_32(d, s1, s2)
1289 }
1290
1291 #[inline(always)]
1292 fn sub_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1293 self.dispatch_simple_alu_3op_32(d, s1, s2)
1294 }
1295
1296 #[inline(always)]
1299 fn xor_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1300 self.dispatch_simple_alu_2op(d, s)
1301 }
1302
1303 #[inline(always)]
1304 fn and_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1305 self.dispatch_simple_alu_2op(d, s)
1306 }
1307
1308 #[inline(always)]
1309 fn or_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1310 self.dispatch_simple_alu_2op(d, s)
1311 }
1312
1313 #[inline(always)]
1314 fn add_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1315 self.dispatch_simple_alu_2op(d, s)
1317 }
1318
1319 #[inline(always)]
1320 fn shift_logical_right_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1321 self.dispatch_simple_alu_2op(d, s1)
1322 }
1323
1324 #[inline(always)]
1325 fn shift_arithmetic_right_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1326 self.dispatch_simple_alu_2op(d, s1)
1327 }
1328
1329 #[inline(always)]
1330 fn shift_logical_left_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1331 self.dispatch_simple_alu_2op(d, s1)
1332 }
1333
1334 #[inline(always)]
1335 fn rotate_right_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _c: u32) -> Self::ReturnTy {
1336 self.dispatch_simple_alu_2op(d, s1)
1337 }
1338
1339 #[inline(always)]
1340 fn reverse_byte(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1341 self.dispatch_simple_alu_2op(d, s)
1342 }
1343
1344 #[inline(always)]
1347 fn add_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1348 self.dispatch_simple_alu_2op_32bit(d, s)
1350 }
1351
1352 #[inline(always)]
1353 fn shift_logical_right_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1354 self.dispatch_simple_alu_2op_32bit(d, s1)
1355 }
1356
1357 #[inline(always)]
1358 fn shift_arithmetic_right_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1359 self.dispatch_simple_alu_2op_32bit(d, s1)
1360 }
1361
1362 #[inline(always)]
1363 fn shift_logical_left_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1364 self.dispatch_simple_alu_2op_32bit(d, s1)
1365 }
1366
1367 #[inline(always)]
1368 fn rotate_right_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _c: u32) -> Self::ReturnTy {
1369 self.dispatch_simple_alu_2op_32bit(d, s1)
1370 }
1371
1372 #[inline(always)]
1375 fn count_leading_zero_bits_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1376 self.dispatch_trivial_2op_1c(d, s)
1377 }
1378
1379 #[inline(always)]
1380 fn count_leading_zero_bits_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1381 self.dispatch_trivial_2op_1c(d, s)
1382 }
1383
1384 #[inline(always)]
1385 fn count_set_bits_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1386 self.dispatch_trivial_2op_1c(d, s)
1387 }
1388
1389 #[inline(always)]
1390 fn count_set_bits_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1391 self.dispatch_trivial_2op_1c(d, s)
1392 }
1393
1394 #[inline(always)]
1395 fn sign_extend_8(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1396 self.dispatch_trivial_2op_1c(d, s)
1397 }
1398
1399 #[inline(always)]
1400 fn sign_extend_16(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1401 self.dispatch_trivial_2op_1c(d, s)
1402 }
1403
1404 #[inline(always)]
1405 fn zero_extend_16(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1406 self.dispatch_trivial_2op_1c(d, s)
1407 }
1408
1409 #[inline(always)]
1412 fn count_trailing_zero_bits_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1413 self.dispatch_trivial_2op_2c(d, s)
1414 }
1415
1416 #[inline(always)]
1417 fn count_trailing_zero_bits_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1418 self.dispatch_trivial_2op_2c(d, s)
1419 }
1420
1421 #[inline(always)]
1424 fn shift_logical_right_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1425 self.dispatch_shift(d, s1, s2)
1426 }
1427
1428 #[inline(always)]
1429 fn shift_arithmetic_right_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1430 self.dispatch_shift(d, s1, s2)
1431 }
1432
1433 #[inline(always)]
1434 fn shift_logical_left_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1435 self.dispatch_shift(d, s1, s2)
1436 }
1437
1438 #[inline(always)]
1439 fn rotate_left_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1440 self.dispatch_shift(d, s1, s2)
1441 }
1442
1443 #[inline(always)]
1444 fn rotate_right_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1445 self.dispatch_shift(d, s1, s2)
1446 }
1447
1448 #[inline(always)]
1451 fn shift_logical_right_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1452 self.dispatch_shift_32(d, s1, s2)
1453 }
1454
1455 #[inline(always)]
1456 fn shift_arithmetic_right_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1457 self.dispatch_shift_32(d, s1, s2)
1458 }
1459
1460 #[inline(always)]
1461 fn shift_logical_left_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1462 self.dispatch_shift_32(d, s1, s2)
1463 }
1464
1465 #[inline(always)]
1466 fn rotate_left_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1467 self.dispatch_shift_32(d, s1, s2)
1468 }
1469
1470 #[inline(always)]
1471 fn rotate_right_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1472 self.dispatch_shift_32(d, s1, s2)
1473 }
1474
1475 #[inline(always)]
1478 fn shift_logical_right_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1479 self.dispatch_shift_imm_alt(d, s2)
1480 }
1481
1482 #[inline(always)]
1483 fn shift_arithmetic_right_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1484 self.dispatch_shift_imm_alt(d, s2)
1485 }
1486
1487 #[inline(always)]
1488 fn shift_logical_left_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1489 self.dispatch_shift_imm_alt(d, s2)
1490 }
1491
1492 #[inline(always)]
1493 fn rotate_right_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _c: u32) -> Self::ReturnTy {
1494 self.dispatch_shift_imm_alt(d, s)
1495 }
1496
1497 #[inline(always)]
1500 fn shift_logical_right_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1501 self.dispatch_shift_imm_alt_32(d, s2)
1502 }
1503
1504 #[inline(always)]
1505 fn shift_arithmetic_right_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1506 self.dispatch_shift_imm_alt_32(d, s2)
1507 }
1508
1509 #[inline(always)]
1510 fn shift_logical_left_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1511 self.dispatch_shift_imm_alt_32(d, s2)
1512 }
1513
1514 #[inline(always)]
1515 fn rotate_right_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _c: u32) -> Self::ReturnTy {
1516 self.dispatch_shift_imm_alt_32(d, s)
1517 }
1518
1519 #[inline(always)]
1522 fn set_less_than_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1523 self.dispatch_compare(d, s1, s2)
1524 }
1525
1526 #[inline(always)]
1527 fn set_less_than_signed(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1528 self.dispatch_compare(d, s1, s2)
1529 }
1530
1531 #[inline(always)]
1534 fn set_less_than_unsigned_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1535 self.dispatch_compare_imm(d, s1)
1536 }
1537
1538 #[inline(always)]
1539 fn set_less_than_signed_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1540 self.dispatch_compare_imm(d, s1)
1541 }
1542
1543 #[inline(always)]
1544 fn set_greater_than_unsigned_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1545 self.dispatch_compare_imm(d, s1)
1546 }
1547
1548 #[inline(always)]
1549 fn set_greater_than_signed_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1550 self.dispatch_compare_imm(d, s1)
1551 }
1552
1553 #[inline(always)]
1556 fn cmov_if_zero(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, c: RawReg) -> Self::ReturnTy {
1557 self.dispatch_cmov(d, s, c)
1558 }
1559
1560 #[inline(always)]
1561 fn cmov_if_not_zero(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, c: RawReg) -> Self::ReturnTy {
1562 self.dispatch_cmov(d, s, c)
1563 }
1564
1565 #[inline(always)]
1566 fn cmov_if_zero_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, c: RawReg, _s: u32) -> Self::ReturnTy {
1567 self.dispatch_cmov_imm(d, c)
1568 }
1569
1570 #[inline(always)]
1571 fn cmov_if_not_zero_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, c: RawReg, _s: u32) -> Self::ReturnTy {
1572 self.dispatch_cmov_imm(d, c)
1573 }
1574
1575 #[inline(always)]
1578 fn maximum(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1579 self.dispatch_min_max(d, s1, s2)
1580 }
1581
1582 #[inline(always)]
1583 fn maximum_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1584 self.dispatch_min_max(d, s1, s2)
1585 }
1586
1587 #[inline(always)]
1588 fn minimum(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1589 self.dispatch_min_max(d, s1, s2)
1590 }
1591
1592 #[inline(always)]
1593 fn minimum_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1594 self.dispatch_min_max(d, s1, s2)
1595 }
1596
1597 #[inline(always)]
1600 fn load_indirect_u8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1601 self.dispatch_indirect_load(dst, base, offset, 1)
1602 }
1603
1604 #[inline(always)]
1605 fn load_indirect_i8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1606 self.dispatch_indirect_load(dst, base, offset, 1)
1607 }
1608
1609 #[inline(always)]
1610 fn load_indirect_u16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1611 self.dispatch_indirect_load(dst, base, offset, 2)
1612 }
1613
1614 #[inline(always)]
1615 fn load_indirect_i16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1616 self.dispatch_indirect_load(dst, base, offset, 2)
1617 }
1618
1619 #[inline(always)]
1620 fn load_indirect_u32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1621 self.dispatch_indirect_load(dst, base, offset, 4)
1622 }
1623
1624 #[inline(always)]
1625 fn load_indirect_i32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1626 self.dispatch_indirect_load(dst, base, offset, 4)
1627 }
1628
1629 #[inline(always)]
1630 fn load_indirect_u64(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1631 self.dispatch_indirect_load(dst, base, offset, 8)
1632 }
1633
1634 #[inline(always)]
1637 fn load_u8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1638 self.dispatch_load(dst, offset, 1)
1639 }
1640
1641 #[inline(always)]
1642 fn load_i8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1643 self.dispatch_load(dst, offset, 1)
1644 }
1645
1646 #[inline(always)]
1647 fn load_u16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1648 self.dispatch_load(dst, offset, 2)
1649 }
1650
1651 #[inline(always)]
1652 fn load_i16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1653 self.dispatch_load(dst, offset, 2)
1654 }
1655
1656 #[inline(always)]
1657 fn load_u32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1658 self.dispatch_load(dst, offset, 4)
1659 }
1660
1661 #[inline(always)]
1662 fn load_i32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1663 self.dispatch_load(dst, offset, 4)
1664 }
1665
1666 #[inline(always)]
1667 fn load_u64(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1668 self.dispatch_load(dst, offset, 8)
1669 }
1670
1671 #[inline(always)]
1674 fn store_imm_indirect_u8(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1675 self.dispatch_store_imm_indirect(base, offset, 1)
1676 }
1677
1678 #[inline(always)]
1679 fn store_imm_indirect_u16(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1680 self.dispatch_store_imm_indirect(base, offset, 2)
1681 }
1682
1683 #[inline(always)]
1684 fn store_imm_indirect_u32(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1685 self.dispatch_store_imm_indirect(base, offset, 4)
1686 }
1687
1688 #[inline(always)]
1689 fn store_imm_indirect_u64(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1690 self.dispatch_store_imm_indirect(base, offset, 8)
1691 }
1692
1693 #[inline(always)]
1696 fn store_indirect_u8(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1697 self.dispatch_store_indirect(src, base, offset, 1)
1698 }
1699
1700 #[inline(always)]
1701 fn store_indirect_u16(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1702 self.dispatch_store_indirect(src, base, offset, 2)
1703 }
1704
1705 #[inline(always)]
1706 fn store_indirect_u32(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1707 self.dispatch_store_indirect(src, base, offset, 4)
1708 }
1709
1710 #[inline(always)]
1711 fn store_indirect_u64(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1712 self.dispatch_store_indirect(src, base, offset, 8)
1713 }
1714
1715 #[inline(always)]
1718 fn store_imm_u8(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1719 self.dispatch_store_imm(offset, 1)
1720 }
1721
1722 #[inline(always)]
1723 fn store_imm_u16(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1724 self.dispatch_store_imm(offset, 2)
1725 }
1726
1727 #[inline(always)]
1728 fn store_imm_u32(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1729 self.dispatch_store_imm(offset, 4)
1730 }
1731
1732 #[inline(always)]
1733 fn store_imm_u64(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1734 self.dispatch_store_imm(offset, 8)
1735 }
1736
1737 #[inline(always)]
1740 fn store_u8(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1741 self.dispatch_store(src, offset, 1)
1742 }
1743
1744 #[inline(always)]
1745 fn store_u16(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1746 self.dispatch_store(src, offset, 2)
1747 }
1748
1749 #[inline(always)]
1750 fn store_u32(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1751 self.dispatch_store(src, offset, 4)
1752 }
1753
1754 #[inline(always)]
1755 fn store_u64(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1756 self.dispatch_store(src, offset, 8)
1757 }
1758
1759 #[inline(always)]
1762 fn branch_less_unsigned(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1763 self.dispatch_branch(offset, args_length, s1, s2, imm)
1764 }
1765
1766 #[inline(always)]
1767 fn branch_less_signed(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1768 self.dispatch_branch(offset, args_length, s1, s2, imm)
1769 }
1770
1771 #[inline(always)]
1772 fn branch_greater_or_equal_unsigned(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1773 self.dispatch_branch(offset, args_length, s1, s2, imm)
1774 }
1775
1776 #[inline(always)]
1777 fn branch_greater_or_equal_signed(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1778 self.dispatch_branch(offset, args_length, s1, s2, imm)
1779 }
1780
1781 #[inline(always)]
1782 fn branch_eq(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1783 self.dispatch_branch(offset, args_length, s1, s2, imm)
1784 }
1785
1786 #[inline(always)]
1787 fn branch_not_eq(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1788 self.dispatch_branch(offset, args_length, s1, s2, imm)
1789 }
1790
1791 #[inline(always)]
1794 fn branch_eq_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1795 self.dispatch_branch_imm(offset, args_length, s1, imm);
1796 }
1797
1798 #[inline(always)]
1799 fn branch_not_eq_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1800 self.dispatch_branch_imm(offset, args_length, s1, imm);
1801 }
1802
1803 #[inline(always)]
1804 fn branch_less_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1805 self.dispatch_branch_imm(offset, args_length, s1, imm);
1806 }
1807
1808 #[inline(always)]
1809 fn branch_less_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1810 self.dispatch_branch_imm(offset, args_length, s1, imm);
1811 }
1812
1813 #[inline(always)]
1814 fn branch_greater_or_equal_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1815 self.dispatch_branch_imm(offset, args_length, s1, imm);
1816 }
1817
1818 #[inline(always)]
1819 fn branch_greater_or_equal_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1820 self.dispatch_branch_imm(offset, args_length, s1, imm);
1821 }
1822
1823 #[inline(always)]
1824 fn branch_less_or_equal_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1825 self.dispatch_branch_imm(offset, args_length, s1, imm);
1826 }
1827
1828 #[inline(always)]
1829 fn branch_less_or_equal_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1830 self.dispatch_branch_imm(offset, args_length, s1, imm);
1831 }
1832
1833 #[inline(always)]
1834 fn branch_greater_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1835 self.dispatch_branch_imm(offset, args_length, s1, imm);
1836 }
1837
1838 #[inline(always)]
1839 fn branch_greater_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1840 self.dispatch_branch_imm(offset, args_length, s1, imm);
1841 }
1842
1843 #[inline(always)]
1846 fn div_unsigned_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1847 self.dispatch_division(d, s1, s2)
1848 }
1849
1850 #[inline(always)]
1851 fn div_signed_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1852 self.dispatch_division(d, s1, s2)
1853 }
1854
1855 #[inline(always)]
1856 fn rem_unsigned_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1857 self.dispatch_division(d, s1, s2)
1858 }
1859
1860 #[inline(always)]
1861 fn rem_signed_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1862 self.dispatch_division(d, s1, s2)
1863 }
1864
1865 #[inline(always)]
1866 fn div_unsigned_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1867 self.dispatch_division(d, s1, s2)
1868 }
1869
1870 #[inline(always)]
1871 fn div_signed_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1872 self.dispatch_division(d, s1, s2)
1873 }
1874
1875 #[inline(always)]
1876 fn rem_unsigned_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1877 self.dispatch_division(d, s1, s2)
1878 }
1879
1880 #[inline(always)]
1881 fn rem_signed_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1882 self.dispatch_division(d, s1, s2)
1883 }
1884
1885 #[inline(always)]
1888 fn and_inverted(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1889 self.dispatch_3op(
1891 d,
1892 s1,
1893 s2,
1894 InstCost {
1895 latency: 2,
1896 decode_slots: 3,
1897 alu_slots: 1,
1898 ..EMPTY_COST
1899 },
1900 )
1901 }
1902
1903 #[inline(always)]
1904 fn or_inverted(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1905 self.dispatch_3op(
1907 d,
1908 s1,
1909 s2,
1910 InstCost {
1911 latency: 2,
1912 decode_slots: 3,
1913 alu_slots: 1,
1914 ..EMPTY_COST
1915 },
1916 )
1917 }
1918
1919 #[inline(always)]
1920 fn xnor(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1921 self.dispatch_3op(
1922 d,
1923 s1,
1924 s2,
1925 InstCost {
1926 latency: 2,
1927 decode_slots: 2 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
1928 alu_slots: 1,
1929 ..EMPTY_COST
1930 },
1931 );
1932 }
1933
1934 #[inline(always)]
1935 fn negate_and_add_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1936 self.dispatch_2op(
1937 d,
1938 s1,
1939 InstCost {
1940 latency: 2,
1941 decode_slots: 3,
1942 alu_slots: 1,
1943 ..EMPTY_COST
1944 },
1945 )
1946 }
1947
1948 #[inline(always)]
1949 fn negate_and_add_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1950 self.dispatch_2op(
1951 d,
1952 s1,
1953 InstCost {
1954 latency: 3,
1955 decode_slots: 4,
1956 alu_slots: 1,
1957 ..EMPTY_COST
1958 },
1959 )
1960 }
1961
1962 #[inline(always)]
1963 fn move_reg(&mut self, _offset: u32, _args_length: u32, dst: RawReg, src: RawReg) -> Self::ReturnTy {
1964 self.dispatch_move_reg_avx2(dst, src);
1965 }
1966
1967 #[inline(always)]
1968 fn load_imm(&mut self, _offset: u32, _args_length: u32, dst: RawReg, _value: u32) -> Self::ReturnTy {
1969 self.dispatch_1op_dst(
1970 dst,
1971 InstCost {
1972 latency: 1,
1973 decode_slots: 1,
1974 ..EMPTY_COST
1975 },
1976 )
1977 }
1978
1979 #[inline(always)]
1980 fn load_imm64(&mut self, _offset: u32, _args_length: u32, dst: RawReg, _value: u64) -> Self::ReturnTy {
1981 self.dispatch_1op_dst(
1982 dst,
1983 InstCost {
1984 latency: 1,
1985 decode_slots: 2,
1986 ..EMPTY_COST
1987 },
1988 );
1989 }
1990
1991 #[inline(always)]
1992 fn mul_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1993 self.dispatch_3op(
1994 d,
1995 s1,
1996 s2,
1997 InstCost {
1998 latency: 4,
1999 decode_slots: 2 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
2000 alu_slots: 1,
2001 mul_slots: 1,
2002 ..EMPTY_COST
2003 },
2004 )
2005 }
2006
2007 #[inline(always)]
2008 fn mul_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2009 self.dispatch_3op(
2010 d,
2011 s1,
2012 s2,
2013 InstCost {
2014 latency: 3,
2015 decode_slots: 1 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
2016 alu_slots: 1,
2017 mul_slots: 1,
2018 ..EMPTY_COST
2019 },
2020 )
2021 }
2022
2023 #[inline(always)]
2024 fn mul_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
2025 self.dispatch_2op(
2026 d,
2027 s1,
2028 InstCost {
2029 latency: 4,
2030 decode_slots: 2 + u32::from(d.get() != s1.get()),
2031 alu_slots: 1,
2032 mul_slots: 1,
2033 ..EMPTY_COST
2034 },
2035 )
2036 }
2037
2038 #[inline(always)]
2039 fn mul_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
2040 self.dispatch_2op(
2041 d,
2042 s1,
2043 InstCost {
2044 latency: 3,
2045 decode_slots: 1 + u32::from(d.get() != s1.get()),
2046 alu_slots: 1,
2047 mul_slots: 1,
2048 ..EMPTY_COST
2049 },
2050 )
2051 }
2052
2053 #[inline(always)]
2054 fn mul_upper_signed_signed(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2055 self.dispatch_3op(
2056 d,
2057 s1,
2058 s2,
2059 InstCost {
2060 latency: 4,
2061 decode_slots: 4,
2062 alu_slots: 1,
2063 mul_slots: 1,
2064 ..EMPTY_COST
2065 },
2066 )
2067 }
2068
2069 #[inline(always)]
2070 fn mul_upper_unsigned_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2071 self.dispatch_3op(
2072 d,
2073 s1,
2074 s2,
2075 InstCost {
2076 latency: 4,
2077 decode_slots: 4,
2078 alu_slots: 1,
2079 mul_slots: 1,
2080 ..EMPTY_COST
2081 },
2082 )
2083 }
2084
2085 #[inline(always)]
2086 fn mul_upper_signed_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2087 self.dispatch_3op(
2088 d,
2089 s1,
2090 s2,
2091 InstCost {
2092 latency: 6,
2093 decode_slots: 4,
2094 alu_slots: 1,
2095 mul_slots: 1,
2096 ..EMPTY_COST
2097 },
2098 )
2099 }
2100
2101 #[cold]
2104 fn invalid(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2105 self.dispatch_finish(2);
2106 }
2107
2108 #[inline(always)]
2109 fn trap(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2110 self.dispatch_finish(2);
2111 }
2112
2113 #[inline(always)]
2114 fn fallthrough(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2115 self.dispatch_finish(2);
2116 }
2117
2118 #[inline(always)]
2119 fn unlikely(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2120 self.dispatch_generic(
2121 None,
2122 None,
2123 None,
2124 InstCost {
2125 latency: 40,
2126 decode_slots: 1,
2127 ..EMPTY_COST
2128 },
2129 );
2130 }
2131
2132 #[inline(always)]
2133 fn jump(&mut self, _offset: u32, _args_length: u32, _target: u32) -> Self::ReturnTy {
2134 self.dispatch_finish(15);
2135 }
2136
2137 #[inline(always)]
2138 fn load_imm_and_jump(&mut self, _offset: u32, _args_length: u32, _ra: RawReg, _value: u32, _target: u32) -> Self::ReturnTy {
2139 self.dispatch_finish(15);
2140 }
2141
2142 #[inline(always)]
2143 fn jump_indirect(&mut self, _offset: u32, _args_length: u32, base: RawReg, _base_offset: u32) -> Self::ReturnTy {
2144 self.dispatch_generic(
2145 None,
2146 Some(base),
2147 None,
2148 InstCost {
2149 latency: 22,
2150 decode_slots: 1,
2151 ..EMPTY_COST
2152 },
2153 );
2154 self.wait_until_empty();
2155 self.finished = true;
2156 }
2157
2158 #[inline(always)]
2159 fn load_imm_and_jump_indirect(
2160 &mut self,
2161 _offset: u32,
2162 _args_length: u32,
2163 _ra: RawReg,
2164 base: RawReg,
2165 _value: u32,
2166 _base_offset: u32,
2167 ) -> Self::ReturnTy {
2168 self.dispatch_generic(
2169 None,
2170 Some(base),
2171 None,
2172 InstCost {
2173 latency: 22,
2174 decode_slots: 1,
2175 ..EMPTY_COST
2176 },
2177 );
2178 self.wait_until_empty();
2179 self.finished = true;
2180 }
2181
2182 #[inline(always)]
2185 fn ecalli(&mut self, _offset: u32, _args_length: u32, _imm: u32) -> Self::ReturnTy {
2186 self.dispatch_generic(
2187 None,
2188 None,
2189 None,
2190 InstCost {
2191 latency: 100,
2192 decode_slots: 4,
2193 alu_slots: 1,
2194 ..EMPTY_COST
2195 },
2196 );
2197 }
2198
2199 #[inline(always)]
2200 fn sbrk(&mut self, _offset: u32, _args_length: u32, dst: RawReg, src: RawReg) -> Self::ReturnTy {
2201 self.dispatch_2op(
2203 dst,
2204 src,
2205 InstCost {
2206 latency: 100,
2207 decode_slots: 4,
2208 alu_slots: 1,
2209 ..EMPTY_COST
2210 },
2211 );
2212 }
2213
2214 #[inline(always)]
2215 fn memset(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2216 self.dispatch_generic(
2218 None,
2219 None,
2220 None,
2221 InstCost {
2222 latency: 100,
2223 decode_slots: 4,
2224 alu_slots: 1,
2225 ..EMPTY_COST
2226 },
2227 )
2228 }
2229}
2230
2231#[derive(Clone)]
2232#[non_exhaustive]
2233pub struct TimelineConfig<'a> {
2234 pub should_enable_fast_forward: bool,
2235 pub instruction_format: InstructionFormat<'a>,
2236}
2237
2238impl<'a> Default for TimelineConfig<'a> {
2239 fn default() -> Self {
2240 TimelineConfig {
2241 should_enable_fast_forward: false,
2242 instruction_format: InstructionFormat {
2243 is_64_bit: true,
2244 ..InstructionFormat::default()
2245 },
2246 }
2247 }
2248}
2249
2250pub fn timeline_for_instructions(
2251 code: &[u8],
2252 isa: InstructionSetKind,
2253 cache_model: CacheModel,
2254 instructions: &[crate::program::ParsedInstruction],
2255 config: TimelineConfig,
2256) -> (String, u32) {
2257 use alloc::collections::BTreeMap;
2258
2259 struct TimelineTracer<'a> {
2260 should_enable_fast_forward: bool,
2261 timeline: &'a mut BTreeMap<(u32, u32), EventKind>,
2262 }
2263
2264 impl<'a> Tracer for TimelineTracer<'a> {
2265 const SHOULD_CALL_ON_EVENT: bool = true;
2266
2267 fn should_enable_fast_forward(&self) -> bool {
2268 self.should_enable_fast_forward
2269 }
2270
2271 fn on_event(&mut self, cycle: u32, instruction: u32, event: EventKind) {
2272 match self.timeline.entry((cycle, instruction)) {
2273 alloc::collections::btree_map::Entry::Vacant(entry) => {
2274 #[cfg(all(test, feature = "logging"))]
2275 log::debug!(
2276 "on_event[{cycle}]: instruction={instruction} '{}' (event={event:?})",
2277 char::from(event)
2278 );
2279 entry.insert(event);
2280 }
2281 alloc::collections::btree_map::Entry::Occupied(entry) => {
2282 panic!(
2283 "duplicate timeline update: cycle={cycle} instruction={instruction} old_event={:?} new_event={event:?}",
2284 entry.get()
2285 );
2286 }
2287 }
2288 }
2289 }
2290
2291 let count = instructions
2292 .iter()
2293 .take_while(|inst| !inst.kind.opcode().starts_new_basic_block())
2294 .count();
2295
2296 let mut instructions = instructions[..(count + 1).min(instructions.len())].to_vec();
2297 if !instructions
2298 .last()
2299 .map(|instruction| instruction.kind.opcode().starts_new_basic_block())
2300 .unwrap_or(false)
2301 {
2302 let next_pc = instructions.last().map(|instruction| instruction.next_offset.0).unwrap_or(0);
2303 instructions.push(crate::program::ParsedInstruction {
2304 kind: crate::program::Instruction::invalid,
2305 offset: crate::program::ProgramCounter(next_pc),
2306 next_offset: crate::program::ProgramCounter(next_pc + 1),
2307 });
2308 }
2309
2310 let mut timeline_map = BTreeMap::new();
2311 let mut sim = Simulator::<B64, _>::new(
2312 code,
2313 isa,
2314 cache_model,
2315 TimelineTracer {
2316 should_enable_fast_forward: config.should_enable_fast_forward,
2317 timeline: &mut timeline_map,
2318 },
2319 );
2320
2321 for &instruction in &instructions {
2322 assert!(sim.take_block_cost().is_none());
2323 instruction.visit_parsing(&mut sim);
2324 }
2325
2326 let total_cycles = cast(sim.cycles).to_usize();
2327 let block_cost = sim.take_block_cost().unwrap();
2328 #[cfg(all(test, feature = "logging"))]
2329 log::debug!("Total cycles: {total_cycles}");
2330
2331 #[cfg(all(test, feature = "logging"))]
2332 log::debug!("Block cost: {block_cost}");
2333
2334 let mut timeline = vec!['.'; total_cycles * instructions.len()];
2335 for ((cycle, instruction), event) in timeline_map {
2336 let index = instruction as usize * total_cycles + cycle as usize;
2337 timeline[index] = char::from(event);
2338 }
2339
2340 let mut timeline_s = String::new();
2341 for (nth_instruction, instruction) in instructions.iter().enumerate() {
2342 use core::fmt::Write;
2343
2344 let line = &timeline[nth_instruction * total_cycles..(nth_instruction + 1) * total_cycles];
2345 timeline_s.extend(line.iter().copied());
2346 timeline_s.push_str(" ");
2347 writeln!(&mut timeline_s, "{}", instruction.display(&config.instruction_format)).unwrap();
2348 }
2349
2350 if config.should_enable_fast_forward {
2351 let mut timeline_new = String::with_capacity(timeline_s.len());
2352 let mut is_in_cycles = true;
2353 let mut last = '.';
2354 for mut ch in timeline_s.chars() {
2355 if ch == ' ' {
2356 is_in_cycles = false;
2357 } else if ch == '\n' {
2358 is_in_cycles = true;
2359 last = '.';
2360 } else if ch == '.' {
2361 if last != 'R' && last != 'D' && is_in_cycles {
2362 ch = last;
2363 }
2364 } else {
2365 last = ch;
2366 }
2367 timeline_new.push(ch);
2368 }
2369 timeline_s = timeline_new;
2370 }
2371
2372 (timeline_s, block_cost)
2373}
2374
2375pub fn trap_cost(isa: InstructionSetKind, cache_model: CacheModel) -> u32 {
2376 let mut sim = Simulator::<B64, _>::new(&[], isa, cache_model, ());
2377 crate::program::ParsedInstruction {
2378 kind: crate::program::Instruction::trap,
2379 offset: crate::program::ProgramCounter(0),
2380 next_offset: crate::program::ProgramCounter(0),
2381 }
2382 .visit_parsing(&mut sim);
2383 sim.take_block_cost().unwrap()
2384}
2385
2386#[cfg(test)]
2387mod tests {
2388 use alloc::string::String;
2389 use alloc::vec::Vec;
2390
2391 use super::{timeline_for_instructions, CacheModel, TimelineConfig};
2392 use crate::assembler::assemble;
2393 use crate::program::{InstructionSetKind, ProgramBlob};
2394
2395 #[cfg(test)]
2396 fn test_config() -> CacheModel {
2397 CacheModel::L1Hit
2398 }
2399
2400 #[cfg(test)]
2401 fn assert_timeline(config: CacheModel, program: &str, expected_timeline: &str) {
2402 use crate::cast::cast;
2403
2404 let _ = env_logger::try_init();
2405
2406 let program = assemble(Some(InstructionSetKind::Latest64), program).unwrap();
2407 let blob = ProgramBlob::parse(program.into()).unwrap();
2408 let instructions: Vec<_> = blob.instructions().collect();
2409
2410 let (timeline_s, cycles) = timeline_for_instructions(
2411 blob.code(),
2412 InstructionSetKind::Latest64,
2413 config,
2414 &instructions,
2415 TimelineConfig::default(),
2416 );
2417 let mut expected_timeline_s = String::new();
2418 let mut expected_cycles = 0;
2419 for line in expected_timeline.lines() {
2420 let line = line.trim();
2421 if line.is_empty() {
2422 continue;
2423 }
2424 expected_timeline_s.push_str(line);
2425 expected_timeline_s.push('\n');
2426
2427 expected_cycles = expected_cycles.max(line.split(" ").next().unwrap().len() as u32);
2428 }
2429
2430 if timeline_s != expected_timeline_s {
2431 panic!("Timeline mismatch!\n\nExpected timeline:\n{expected_timeline_s}\nActual timeline:\n{timeline_s}");
2432 }
2433
2434 let expected_cycles = cast(expected_cycles).to_signed() - 3;
2435 assert_eq!(cast(cycles).to_signed(), expected_cycles);
2436
2437 #[cfg(feature = "logging")]
2438 log::debug!("Rerunning with fast-forward enabled...");
2439
2440 let timeline_config = TimelineConfig {
2441 should_enable_fast_forward: true,
2442 ..TimelineConfig::default()
2443 };
2444 let (timeline_ff_s, cycles_ff) =
2445 timeline_for_instructions(blob.code(), InstructionSetKind::Latest64, config, &instructions, timeline_config);
2446 assert_eq!(cycles_ff, cycles);
2447 if timeline_ff_s != expected_timeline_s {
2448 panic!("Timeline mismatch for fast-forward!\n\nExpected timeline:\n{expected_timeline_s}\nActual timeline:\n{timeline_ff_s}");
2449 }
2450 }
2451
2452 #[test]
2453 fn test_parallel_simple() {
2454 assert_timeline(
2455 test_config(),
2456 "
2457 a0 = a1 + a2
2458 a1 = a1 + a2
2459 trap
2460 ",
2461 "
2462 DeER. a0 = a1 + a2
2463 DeER. a1 = a1 + a2
2464 DeeER trap
2465 ",
2466 );
2467 }
2468
2469 #[test]
2470 fn test_sequential_simple() {
2471 assert_timeline(
2472 test_config(),
2473 "
2474 a0 = a1 + a2
2475 a1 = a0 + a2
2476 trap
2477 ",
2478 "
2479 DeER.. a0 = a1 + a2
2480 D=eER. a1 = a0 + a2
2481 .DeeER trap
2482 ",
2483 );
2484 }
2485
2486 #[test]
2487 fn test_sequential_decode_limits() {
2488 assert_timeline(
2489 test_config(),
2490 "
2491 a0 = 0x12345678aabbccdd
2492 a1 = 0x12345678aabbccdd
2493 a2 = 0x12345678aabbccdd
2494 a3 = 0x12345678aabbccdd
2495 trap
2496 ",
2497 "
2498 DeER... a0 = 0x12345678aabbccdd
2499 DeER... a1 = 0x12345678aabbccdd
2500 .DeER.. a2 = 0x12345678aabbccdd
2501 .DeER.. a3 = 0x12345678aabbccdd
2502 ..DeeER trap
2503 ",
2504 );
2505 }
2506
2507 #[test]
2508 fn test_resource_limits_mul() {
2509 assert_timeline(
2510 test_config(),
2511 "
2512 a0 = a1 * a2
2513 a1 = a3 * a4
2514 trap
2515 ",
2516 "
2517 DeeeER... a0 = a1 * a2
2518 D===eeeER a1 = a3 * a4
2519 .DeeE---R trap
2520 ",
2521 );
2522 }
2523
2524 #[test]
2525 fn test_mul_with_dep() {
2526 assert_timeline(
2527 test_config(),
2528 "
2529 a0 = a1 + a2
2530 a4 = a0 * a3
2531 trap
2532 ",
2533 "
2534 DeER... a0 = a1 + a2
2535 D=eeeER a4 = a0 * a3
2536 .DeeE-R trap
2537 ",
2538 );
2539 }
2540
2541 #[test]
2542 fn test_register_move() {
2543 assert_timeline(
2544 test_config(),
2545 "
2546 s0 = 1
2547 a0 = s0
2548 a1 = a0 + 1
2549 trap
2550 ",
2551 "
2552 DeER.. s0 = 0x1
2553 D..... a0 = s0
2554 D=eER. a1 = a0 + 0x1
2555 .DeeER trap
2556 ",
2557 )
2558 }
2559
2560 #[test]
2561 fn test_memory_accesses() {
2562 assert_timeline(
2563 test_config(),
2564 "
2565 a0 = s1
2566 ra = u64 [sp + 0x30]
2567 s0 = u64 [sp + 0x28]
2568 s1 = u64 [sp + 0x20]
2569 sp = sp + 0x38
2570 ret
2571 ",
2572 "
2573 D............................ a0 = s1
2574 DeeeeER...................... ra = u64 [sp + 0x30]
2575 DeeeeER...................... s0 = u64 [sp + 0x28]
2576 DeeeeER...................... s1 = u64 [sp + 0x20]
2577 .DeE--R...................... sp = sp + 0x38
2578 .D===eeeeeeeeeeeeeeeeeeeeeeER ret
2579 ",
2580 )
2581 }
2582
2583 #[test]
2584 fn test_empty() {
2585 assert_timeline(
2586 test_config(),
2587 "
2588 fallthrough
2589 ",
2590 "
2591 DeeER fallthrough
2592 ",
2593 );
2594 }
2595
2596 #[test]
2597 fn test_overwrite_register() {
2598 assert_timeline(
2599 test_config(),
2600 "
2601 s0 = u64 [sp]
2602 s0 = a1 + a2
2603 s0 = u64 [s0]
2604 jump [s0]
2605 ",
2606 "
2607 DeeeeER....................... s0 = u64 [sp]
2608 DeE---R....................... s0 = a1 + a2
2609 D=eeeeER...................... s0 = u64 [s0]
2610 .D====eeeeeeeeeeeeeeeeeeeeeeER jump [s0]
2611 ",
2612 );
2613 }
2614
2615 #[test]
2616 fn test_load_and_jump() {
2617 assert_timeline(
2618 test_config(),
2619 "
2620 @0:
2621 a2 = u8 [a0 + 11]
2622 jump @0 if a2 == 0
2623 ",
2624 "
2625 DeeeeER. a2 = u8 [a0 + 0xb]
2626 D====eER jump 0 if a2 == 0
2627 ",
2628 );
2629 }
2630
2631 #[test]
2632 fn test_complex() {
2633 assert_timeline(
2634 test_config(),
2635 "
2636 a2 = i16 [a0 + 0x6]
2637 a1 = a1 & 0x7
2638 a3 = 0x1
2639 a1 = a1 << 0x8
2640 a2 = a2 & 0xfffffffffffff8ff
2641 a1 = a1 | a2
2642 a2 = a1 + a3
2643 u8 [a0 + 0x2] = a3
2644 trap
2645 ",
2646 "
2647 DeeeeER....................... a2 = i16 [a0 + 0x6]
2648 DeE---R....................... a1 = a1 & 0x7
2649 DeE---R....................... a3 = 0x1
2650 D=eE--R....................... a1 = a1 << 0x8
2651 .D===eER...................... a2 = a2 & 0xfffffffffffff8ff
2652 .D====eER..................... a1 = a1 | a2
2653 .D=====eER.................... a2 = a1 + a3
2654 ..DeeeeeeeeeeeeeeeeeeeeeeeeeER u8 [a0 + 0x2] = a3
2655 ..DeeE-----------------------R trap
2656 ",
2657 );
2658 }
2659
2660 #[test]
2661 fn test_even_more_complex() {
2662 assert_timeline(
2663 test_config(),
2664 "
2665 @0:
2666 i32 a1 = clz a0
2667 i32 a0 = a0 << a1
2668 a1 = a1 << 0x17
2669 i32 a2 = a0 >> 0x8
2670 a3 = a0 >> 0x7
2671 a3 = a3 & ~a2
2672 i32 a2 = a2 - a1
2673 a0 = a0 << 0x18
2674 a3 = a3 & 0x1
2675 i32 a0 = a0 - a3
2676 i32 a0 = a0 >> 0x1f
2677 a1 = a2 + 0x4e800000
2678 i32 a0 = a0 + a1
2679 a1 = 0x46008c00
2680 ra = 0x24
2681 jump @0
2682 ",
2683 "
2684 DeER..................... i32 a1 = clz a0
2685 D=eeER................... i32 a0 = a0 << a1
2686 .DeE-R................... a1 = a1 << 0x17
2687 .D==eeER................. i32 a2 = a0 >> 0x8
2688 ..D=eE-R................. a3 = a0 >> 0x7
2689 ...D==eeER............... a3 = a3 & ~a2
2690 ....D=eeER............... i32 a2 = a2 - a1
2691 ....DeE--R............... a0 = a0 << 0x18
2692 ....D===eER.............. a3 = a3 & 0x1
2693 .....D===eeER............ i32 a0 = a0 - a3
2694 .....D=====eeER.......... i32 a0 = a0 >> 0x1f
2695 ......D=eE----R.......... a1 = a2 + 0x4e800000
2696 ......D======eeER........ i32 a0 = a0 + a1
2697 .......DeE------R........ a1 = 0x46008c00
2698 .......DeE------R........ ra = 0x24
2699 .......DeeeeeeeeeeeeeeeER jump 0
2700 ",
2701 );
2702 }
2703
2704 #[test]
2705 fn test_super_complex_l1() {
2706 assert_timeline(
2707 CacheModel::L1Hit,
2708 "
2709 @0:
2710 unlikely
2711 t1 = u8 [s0]
2712 a1 = u8 [s0 + 0x11]
2713 a2 = 0x172d0
2714 a3 = u8 [s0 + 0x16]
2715 t0 = sp + 0x58
2716 a1 = a1 << 0x3
2717 a1 = a1 + a2
2718 a2 = u8 [a1]
2719 a5 = u8 [a1 + 0x1]
2720 s1 = u8 [a1 + 0x2]
2721 a4 = u8 [a1 + 0x3]
2722 a3 = a3 + t0
2723 a5 = a5 << 0x8
2724 s1 = s1 << 0x10
2725 a4 = a4 << 0x18
2726 a2 = a2 | a5
2727 a5 = u8 [a1 + 0x4]
2728 a0 = u8 [a1 + 0x5]
2729 a4 = a4 | s1
2730 s1 = u8 [a1 + 0x6]
2731 a1 = u8 [a1 + 0x7]
2732 a0 = a0 << 0x8
2733 a0 = a0 | a5
2734 s1 = s1 << 0x10
2735 a1 = a1 << 0x18
2736 a1 = a1 | s1
2737 a2 = a2 | a4
2738 a0 = a0 | a1
2739 a1 = s0 - t1
2740 a0 = a0 << 0x20
2741 a0 = a0 | a2
2742 u64 [sp + 0x58] = a0
2743 a0 = u8 [a3]
2744 a1 = u8 [a1 + 0x4]
2745 a0 = a1 * a0
2746 a1 = u8 [s0 + 0x23]
2747 jump @0 if a1 != 0
2748 ",
2749 "
2750 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER............................ unlikely
2751 DeeeeE------------------------------------R............................ t1 = u8 [s0]
2752 DeeeeE------------------------------------R............................ a1 = u8 [s0 + 0x11]
2753 DeE---------------------------------------R............................ a2 = 0x172d0
2754 .DeeeeE-----------------------------------R............................ a3 = u8 [s0 + 0x16]
2755 .DeE--------------------------------------R............................ t0 = sp + 0x58
2756 .D===eE-----------------------------------R............................ a1 = a1 << 0x3
2757 ..D===eE----------------------------------R............................ a1 = a1 + a2
2758 ..D====eeeeE------------------------------R............................ a2 = u8 [a1]
2759 ..D====eeeeE------------------------------R............................ a5 = u8 [a1 + 0x1]
2760 ..D====eeeeE------------------------------R............................ s1 = u8 [a1 + 0x2]
2761 ...D===eeeeE------------------------------R............................ a4 = u8 [a1 + 0x3]
2762 ...D==eE----------------------------------R............................ a3 = a3 + t0
2763 ...D=======eE-----------------------------R............................ a5 = a5 << 0x8
2764 ...D=======eE-----------------------------R............................ s1 = s1 << 0x10
2765 ....D======eE-----------------------------R............................ a4 = a4 << 0x18
2766 ....D=======eE----------------------------R............................ a2 = a2 | a5
2767 ....D======eeeeE--------------------------R............................ a5 = u8 [a1 + 0x4]
2768 ....D=======eeeeE-------------------------R............................ a0 = u8 [a1 + 0x5]
2769 .....D======eE----------------------------R............................ a4 = a4 | s1
2770 .....D=======eeeeE------------------------R............................ s1 = u8 [a1 + 0x6]
2771 .....D=======eeeeE------------------------R............................ a1 = u8 [a1 + 0x7]
2772 .....D==========eE------------------------R............................ a0 = a0 << 0x8
2773 ......D==========eE-----------------------R............................ a0 = a0 | a5
2774 ......D==========eE-----------------------R............................ s1 = s1 << 0x10
2775 ......D==========eE-----------------------R............................ a1 = a1 << 0x18
2776 ......D===========eE----------------------R............................ a1 = a1 | s1
2777 .......D=======eE-------------------------R............................ a2 = a2 | a4
2778 .......D===========eE---------------------R............................ a0 = a0 | a1
2779 .......D========eE------------------------R............................ a1 = s0 - t1
2780 ........D===========eE--------------------R............................ a0 = a0 << 0x20
2781 ........D============eE-------------------R............................ a0 = a0 | a2
2782 ...........................................DeeeeeeeeeeeeeeeeeeeeeeeeeER u64 [sp + 0x58] = a0
2783 ...........................................DeeeeE---------------------R a0 = u8 [a3]
2784 ...........................................DeeeeE---------------------R a1 = u8 [a1 + 0x4]
2785 ...........................................D====eeeE------------------R a0 = a1 * a0
2786 ............................................DeeeeE--------------------R a1 = u8 [s0 + 0x23]
2787 ............................................D====eE-------------------R jump 0 if a1 != 0
2788 ",
2789 );
2790 }
2791
2792 #[test]
2793 fn test_super_complex_l2() {
2794 assert_timeline(
2795 CacheModel::L2Hit,
2796 "
2797 @0:
2798 unlikely
2799 t1 = u8 [s0]
2800 a1 = u8 [s0 + 0x11]
2801 a2 = 0x172d0
2802 a3 = u8 [s0 + 0x16]
2803 t0 = sp + 0x58
2804 a1 = a1 << 0x3
2805 a1 = a1 + a2
2806 a2 = u8 [a1]
2807 a5 = u8 [a1 + 0x1]
2808 s1 = u8 [a1 + 0x2]
2809 a4 = u8 [a1 + 0x3]
2810 a3 = a3 + t0
2811 a5 = a5 << 0x8
2812 s1 = s1 << 0x10
2813 a4 = a4 << 0x18
2814 a2 = a2 | a5
2815 a5 = u8 [a1 + 0x4]
2816 a0 = u8 [a1 + 0x5]
2817 a4 = a4 | s1
2818 s1 = u8 [a1 + 0x6]
2819 a1 = u8 [a1 + 0x7]
2820 a0 = a0 << 0x8
2821 a0 = a0 | a5
2822 s1 = s1 << 0x10
2823 a1 = a1 << 0x18
2824 a1 = a1 | s1
2825 a2 = a2 | a4
2826 a0 = a0 | a1
2827 a1 = s0 - t1
2828 a0 = a0 << 0x20
2829 a0 = a0 | a2
2830 u64 [sp + 0x58] = a0
2831 a0 = u8 [a3]
2832 a1 = u8 [a1 + 0x4]
2833 a0 = a1 * a0
2834 a1 = u8 [s0 + 0x23]
2835 jump @0 if a1 != 0
2836 ",
2837 "
2838 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER..................................................................... unlikely
2839 DeeeeeeeeeeeeeeeeeeeeeeeeeE---------------R..................................................................... t1 = u8 [s0]
2840 DeeeeeeeeeeeeeeeeeeeeeeeeeE---------------R..................................................................... a1 = u8 [s0 + 0x11]
2841 DeE---------------------------------------R..................................................................... a2 = 0x172d0
2842 .DeeeeeeeeeeeeeeeeeeeeeeeeeE--------------R..................................................................... a3 = u8 [s0 + 0x16]
2843 .DeE--------------------------------------R..................................................................... t0 = sp + 0x58
2844 .D========================eE--------------R..................................................................... a1 = a1 << 0x3
2845 ..D========================eE-------------R..................................................................... a1 = a1 + a2
2846 ..D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... a2 = u8 [a1]
2847 ..D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... a5 = u8 [a1 + 0x1]
2848 ..D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... s1 = u8 [a1 + 0x2]
2849 ...D========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... a4 = u8 [a1 + 0x3]
2850 ...D=======================eE-------------------------R......................................................... a3 = a3 + t0
2851 ...D=================================================eER........................................................ a5 = a5 << 0x8
2852 ...D=================================================eER........................................................ s1 = s1 << 0x10
2853 ....D================================================eER........................................................ a4 = a4 << 0x18
2854 ....D=================================================eER....................................................... a2 = a2 | a5
2855 ....D================================================eeeeeeeeeeeeeeeeeeeeeeeeeER................................ a5 = u8 [a1 + 0x4]
2856 ....D=================================================eeeeeeeeeeeeeeeeeeeeeeeeeER............................... a0 = u8 [a1 + 0x5]
2857 .....D================================================eE------------------------R............................... a4 = a4 | s1
2858 .....D=================================================eeeeeeeeeeeeeeeeeeeeeeeeeER.............................. s1 = u8 [a1 + 0x6]
2859 .....D=================================================eeeeeeeeeeeeeeeeeeeeeeeeeER.............................. a1 = u8 [a1 + 0x7]
2860 .....D=========================================================================eER.............................. a0 = a0 << 0x8
2861 ......D=========================================================================eER............................. a0 = a0 | a5
2862 ......D=========================================================================eER............................. s1 = s1 << 0x10
2863 ......D=========================================================================eER............................. a1 = a1 << 0x18
2864 ......D==========================================================================eER............................ a1 = a1 | s1
2865 .......D======================================================================eE---R............................ a2 = a2 | a4
2866 .......D==========================================================================eER........................... a0 = a0 | a1
2867 .......D==================eE--------------------------------------------------------R........................... a1 = s0 - t1
2868 ........D==========================================================================eER.......................... a0 = a0 << 0x20
2869 ........D===========================================================================eER......................... a0 = a0 | a2
2870 ...........................................D=========================================eeeeeeeeeeeeeeeeeeeeeeeeeER u64 [sp + 0x58] = a0
2871 ...........................................D===================================eeeeeeeeeeeeeeeeeeeeeeeeeE------R a0 = u8 [a3]
2872 ...........................................D=====================================eeeeeeeeeeeeeeeeeeeeeeeeeE----R a1 = u8 [a1 + 0x4]
2873 ...........................................D==============================================================eeeE-R a0 = a1 * a0
2874 ............................................D====================================eeeeeeeeeeeeeeeeeeeeeeeeeE----R a1 = u8 [s0 + 0x23]
2875 ............................................D=============================================================eE---R jump 0 if a1 != 0
2876 ",
2877 );
2878 }
2879
2880 #[test]
2881 fn test_l3_loads() {
2882 assert_timeline(CacheModel::L3Hit,
2883 "
2884 a0 = u64 [a0]
2885 a0 = u64 [a0]
2886 a0 = u64 [a0]
2887 a0 = u64 [a0]
2888 ret
2889 ",
2890 "
2891 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER............................................................................................................... a0 = u64 [a0]
2892 D=====================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER.......................................................................... a0 = u64 [a0]
2893 D==========================================================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER..................................... a0 = u64 [a0]
2894 D===============================================================================================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER a0 = u64 [a0]
2895 .DeeeeeeeeeeeeeeeeeeeeeeE-----------------------------------------------------------------------------------------------------------------------------R ret
2896 ",
2897 )
2898 }
2899
2900 #[test]
2901 fn test_ecalli() {
2902 assert_timeline(
2903 test_config(),
2904 "
2905 ecalli 27
2906 ret
2907 ",
2908 "
2909 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER ecalli 27
2910 .DeeeeeeeeeeeeeeeeeeeeeeE-----------------------------------------------------------------------------R ret
2911 ",
2912 );
2913 }
2914
2915 #[test]
2916 fn test_xor_and_shift() {
2917 assert_timeline(
2918 test_config(),
2919 "
2920 a1 = a1 ^ 0xffffffffffffffff
2921 a1 = a0 >> a1
2922 fallthrough
2923 ",
2924 "
2925 DeER.. a1 = a1 ^ 0xffffffffffffffff
2926 D=eER. a1 = a0 >> a1
2927 .DeeER fallthrough
2928 ",
2929 )
2930 }
2931
2932 #[test]
2933 fn test_move_reg_decode_slots() {
2934 assert_timeline(
2935 test_config(),
2936 "
2937 s0 = a1
2938 a0 = a1
2939 a1 = t0
2940 a2 = s1
2941 trap
2942 ",
2943 "
2944 D..... s0 = a1
2945 D..... a0 = a1
2946 D..... a1 = t0
2947 D..... a2 = s1
2948 .DeeER trap
2949 ",
2950 )
2951 }
2952}