1#![allow(clippy::undocumented_unsafe_blocks)]
2#![allow(unsafe_code)]
3
4use crate::cast::cast;
5use crate::program::{InstructionFormat, InstructionSet, InstructionSetKind, Opcode, ParsingVisitor, RawReg, UNUSED_RAW_OPCODE};
6use crate::utils::{Bitness, BitnessT, GasVisitorT, B64};
7use alloc::string::String;
8use alloc::vec;
9
10#[cfg(feature = "simd")]
11use picosimd::amd64::{
12 avx2::i8x32,
13 avx2_composite::{i16x32, i32x32},
14 sse::i8x16,
15};
16
17#[cfg(not(feature = "simd"))]
18use picosimd::fallback::{i16x32, i32x32, i8x16, i8x32};
19
20#[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
21macro_rules! unsafe_avx2 {
22 ($($t:tt)*) => { $($t)* }
23}
24
25#[cfg(all(feature = "simd", target_arch = "x86_64"))]
26macro_rules! unsafe_avx2 {
27 ($($t:tt)*) => { unsafe { $($t)* } }
28}
29
30#[derive(Copy, Clone, Debug, Hash)]
31pub struct CacheModel {
32 pub memory_access_cost: i8,
33}
34
35#[allow(non_upper_case_globals)]
36impl CacheModel {
37 pub const L1Hit: Self = CacheModel { memory_access_cost: 4 };
38 pub const L2Hit: Self = CacheModel { memory_access_cost: 25 };
39 pub const L3Hit: Self = CacheModel { memory_access_cost: 37 };
40}
41
42#[cfg(feature = "arbitrary")]
43impl arbitrary::Arbitrary<'_> for CacheModel {
44 fn arbitrary(u: &mut arbitrary::Unstructured) -> arbitrary::Result<Self> {
45 Ok(Self {
46 memory_access_cost: <i8 as arbitrary::Arbitrary>::arbitrary(u)?.abs().max(1),
47 })
48 }
49}
50
51const MAX_DECODE_PER_CYCLE: u32 = 4;
53
54const REORDER_BUFFER_SIZE: usize = 32;
56
57const GAS_COST_SLACK: i32 = 3;
59
60#[derive(Copy, Clone, Debug)]
61pub struct InstCost {
62 pub latency: i8,
63 pub decode_slots: u32,
64 pub alu_slots: u32,
65 pub mul_slots: u32,
66 pub div_slots: u32,
67 pub load_slots: u32,
68 pub store_slots: u32,
69}
70
71const MAX_ALU_SLOTS: u32 = 4;
72const MAX_LOAD_SLOTS: u32 = 4;
73const MAX_STORE_SLOTS: u32 = 4;
74const MAX_MUL_SLOTS: u32 = 1;
75const MAX_DIV_SLOTS: u32 = 1;
76
77const fn bits_needed(value: u32) -> u32 {
78 (value + 1).next_power_of_two().ilog2()
79}
80
81const ALU_BITS: u32 = bits_needed(MAX_ALU_SLOTS);
82const LOAD_BITS: u32 = bits_needed(MAX_LOAD_SLOTS);
83const STORE_BITS: u32 = bits_needed(MAX_STORE_SLOTS);
84const MUL_BITS: u32 = bits_needed(MAX_MUL_SLOTS);
85const DIV_BITS: u32 = bits_needed(MAX_DIV_SLOTS);
86
87#[allow(clippy::int_plus_one)]
88const _: () = {
89 assert!((1 << ALU_BITS) - 1 >= MAX_ALU_SLOTS);
90 assert!((1 << LOAD_BITS) - 1 >= MAX_LOAD_SLOTS);
91 assert!((1 << STORE_BITS) - 1 >= MAX_STORE_SLOTS);
92 assert!((1 << MUL_BITS) - 1 >= MAX_MUL_SLOTS);
93 assert!((1 << DIV_BITS) - 1 >= MAX_DIV_SLOTS);
94};
95
96const ALU_OFFSET: u32 = 0;
97const LOAD_OFFSET: u32 = ALU_OFFSET + ALU_BITS + 1;
98const STORE_OFFSET: u32 = LOAD_OFFSET + LOAD_BITS + 1;
99const MUL_OFFSET: u32 = STORE_OFFSET + STORE_BITS + 1;
100const DIV_OFFSET: u32 = MUL_OFFSET + MUL_BITS + 1;
101
102const RESOURCES_UNDERFLOW_MASK: u32 = (1 << (ALU_BITS + ALU_OFFSET))
103 | (1 << (LOAD_BITS + LOAD_OFFSET))
104 | (1 << (STORE_BITS + STORE_OFFSET))
105 | (1 << (MUL_BITS + MUL_OFFSET))
106 | (1 << (DIV_BITS + DIV_OFFSET));
107
108#[cfg(all(test, feature = "logging"))]
109struct DebugResources(u32);
110
111#[cfg(all(test, feature = "logging"))]
112impl core::fmt::Debug for DebugResources {
113 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
114 fmt.debug_struct("Resources")
115 .field("alu", &((self.0 >> ALU_OFFSET) & ((1 << ALU_BITS) - 1)))
116 .field("load", &((self.0 >> LOAD_OFFSET) & ((1 << LOAD_BITS) - 1)))
117 .field("store", &((self.0 >> STORE_OFFSET) & ((1 << STORE_BITS) - 1)))
118 .field("mul", &((self.0 >> MUL_OFFSET) & ((1 << MUL_BITS) - 1)))
119 .field("div", &((self.0 >> DIV_OFFSET) & ((1 << DIV_BITS) - 1)))
120 .finish()
121 }
122}
123
124#[cfg(all(test, feature = "logging"))]
125struct DebugDeps([i32; 32]);
126
127#[cfg(all(test, feature = "logging"))]
128impl core::fmt::Debug for DebugDeps {
129 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
130 fmt.write_str("{")?;
131 let iter = self.0.into_iter().enumerate().filter(|(_, deps)| *deps != 0);
132 let mut remaining = iter.clone().count();
133 for (nth, mut deps) in iter {
134 write!(fmt, "{nth}={{")?;
135 while deps != 0 {
136 let slot = deps.trailing_zeros();
137 deps &= !(1 << slot);
138 write!(fmt, "{slot}")?;
139 if deps != 0 {
140 fmt.write_str(",")?;
141 }
142 }
143 fmt.write_str("}")?;
144 remaining -= 1;
145 if remaining > 0 {
146 fmt.write_str(", ")?;
147 }
148 }
149 fmt.write_str("}")?;
150
151 Ok(())
152 }
153}
154
155#[cfg(all(test, feature = "logging"))]
156struct DebugMask([i8; 32]);
157
158#[cfg(all(test, feature = "logging"))]
159impl core::fmt::Debug for DebugMask {
160 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
161 fmt.write_str("{")?;
162 let iter = self.0.into_iter().enumerate().filter(|(_, mask)| *mask != 0);
163 let mut remaining = iter.clone().count();
164 for (nth, mask) in iter {
165 if mask == 0 {
166 continue;
167 } else if mask == -1 {
168 write!(fmt, "{nth}")?;
169 } else {
170 write!(fmt, "{nth}={{{mask}}}")?;
171 }
172
173 remaining -= 1;
174 if remaining > 0 {
175 fmt.write_str(", ")?;
176 }
177 }
178 fmt.write_str("}")?;
179
180 Ok(())
181 }
182}
183
184#[cfg(all(test, feature = "logging"))]
185struct DebugEntryByRegister([i8; 16]);
186
187#[cfg(all(test, feature = "logging"))]
188impl core::fmt::Debug for DebugEntryByRegister {
189 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
190 fmt.write_str("{")?;
191 let mut remaining = self.0.iter().filter(|&&entry| entry != -1).count();
192 for (reg, entry) in crate::program::Reg::ALL.into_iter().zip(self.0.into_iter()) {
193 if entry == -1 {
194 continue;
195 }
196
197 write!(fmt, "{reg}={entry}")?;
198 remaining -= 1;
199 if remaining > 0 {
200 fmt.write_str(", ")?;
201 }
202 }
203 fmt.write_str("}")?;
204
205 Ok(())
206 }
207}
208
209#[cfg(all(test, feature = "logging"))]
210struct DebugCyclesRemaining([i8; 32]);
211
212#[cfg(all(test, feature = "logging"))]
213impl core::fmt::Debug for DebugCyclesRemaining {
214 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
215 fmt.write_str("{")?;
216 let mut remaining = self.0.len();
217 for (index, count) in self.0.into_iter().enumerate() {
218 write!(fmt, "{index}={count}")?;
219 remaining -= 1;
220 if remaining > 0 {
221 fmt.write_str(", ")?;
222 }
223 }
224 fmt.write_str("}")?;
225
226 Ok(())
227 }
228}
229
230#[cfg(all(test, feature = "logging"))]
231struct DebugState([i8; 32]);
232
233#[cfg(all(test, feature = "logging"))]
234impl core::fmt::Debug for DebugState {
235 fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
236 fmt.write_str("{")?;
237 let iter = self.0.into_iter().enumerate().filter(|(_, state)| *state != 0);
238 let mut remaining = iter.clone().count();
239 for (nth, state) in iter {
240 remaining -= 1;
241 let state = match state {
242 1 => 'D',
243 2 => 'w',
244 3 => 'e',
245 4 => 'X',
246 _ => {
247 write!(fmt, "{nth}={state}")?;
248 if remaining > 0 {
249 fmt.write_str(", ")?;
250 }
251 continue;
252 }
253 };
254
255 write!(fmt, "{nth}={state}")?;
256 if remaining > 0 {
257 fmt.write_str(", ")?;
258 }
259 }
260 fmt.write_str("}")?;
261
262 Ok(())
263 }
264}
265
266impl InstCost {
267 #[inline(always)]
268 const fn resources(&self) -> u32 {
269 assert!(self.alu_slots <= MAX_ALU_SLOTS);
270 assert!(self.mul_slots <= MAX_MUL_SLOTS);
271 assert!(self.div_slots <= MAX_DIV_SLOTS);
272 assert!(self.load_slots <= MAX_LOAD_SLOTS);
273 assert!(self.store_slots <= MAX_STORE_SLOTS);
274
275 (self.alu_slots << ALU_OFFSET)
276 | (self.load_slots << LOAD_OFFSET)
277 | (self.store_slots << STORE_OFFSET)
278 | (self.mul_slots << MUL_OFFSET)
279 | (self.div_slots << DIV_OFFSET)
280 }
281}
282
283const EMPTY_COST: InstCost = InstCost {
284 latency: 0,
285 decode_slots: 1,
286 alu_slots: 0,
287 mul_slots: 0,
288 div_slots: 0,
289 load_slots: 0,
290 store_slots: 0,
291};
292
293#[derive(Copy, Clone, Debug)]
294pub enum EventKind {
295 Decode,
296 WaitingForDependencies,
297 Executing,
298 Executed,
299 WaitingForRetirement,
300 Retired,
301}
302
303impl From<EventKind> for char {
304 fn from(kind: EventKind) -> char {
305 match kind {
306 EventKind::Decode => 'D',
307 EventKind::WaitingForDependencies => '=',
308 EventKind::Executing => 'e',
309 EventKind::Executed => 'E',
310 EventKind::WaitingForRetirement => '-',
311 EventKind::Retired => 'R',
312 }
313 }
314}
315
316pub trait Tracer: Sized {
317 const SHOULD_CALL_ON_EVENT: bool;
319
320 fn should_enable_fast_forward(&self) -> bool {
321 true
322 }
323
324 fn on_event(&mut self, _cycle: u32, _instruction: u32, _event: EventKind) {}
325}
326
327impl Tracer for () {
328 const SHOULD_CALL_ON_EVENT: bool = false;
329}
330
331pub struct Simulator<'a, B, T: Tracer = ()> {
332 code: &'a [u8],
334 cycles: u32,
336 instructions: u32,
338 finished: bool,
340 decode_slots_remaining_this_cycle: u32,
342 resources_available: u32,
344 instructions_in_flight: u32,
346 reorder_buffer_head: u32,
348 reorder_buffer_tail: u32,
350 rob_instruction: [u32; REORDER_BUFFER_SIZE],
353 rob_state: i8x32,
355 rob_cycles_remaining: i8x32,
357 rob_required_resources: i16x32,
359 rob_dependencies: i32x32,
361 rob_depended_by: i32x32,
363 registers_written_by_rob_entry: i16x32,
365 rob_entry_by_register: i8x16,
367 cache_model: CacheModel,
369 force_branch_is_cheap: Option<bool>,
371
372 opcode_trap: u8,
373 opcode_unlikely: u8,
374
375 tracer: T,
376 _phantom: core::marker::PhantomData<B>,
377}
378
379impl<'a, B, T> Simulator<'a, B, T>
380where
381 T: Tracer,
382 B: BitnessT,
383{
384 pub fn new(code: &'a [u8], isa: InstructionSetKind, cache_model: CacheModel, tracer: T) -> Self {
385 unsafe_avx2! {
386 let mut simulator = Simulator {
387 code,
388 rob_instruction: [0; REORDER_BUFFER_SIZE],
389 cycles: 0,
390 instructions: 0,
391 finished: false,
392 decode_slots_remaining_this_cycle: 0,
393 resources_available: 0,
394 rob_state: i8x32::zero(),
395 rob_cycles_remaining: i8x32::zero(),
396 rob_required_resources: i16x32::zero(),
397 rob_dependencies: i32x32::zero(),
398 rob_depended_by: i32x32::zero(),
399 registers_written_by_rob_entry: i16x32::zero(),
400 rob_entry_by_register: i8x16::zero(),
401 reorder_buffer_tail: 0,
402 cache_model,
403 tracer,
404 force_branch_is_cheap: None,
405 instructions_in_flight: 0,
406 reorder_buffer_head: 0,
407 opcode_trap: isa.opcode_to_u8(Opcode::trap).unwrap_or(UNUSED_RAW_OPCODE),
408 opcode_unlikely: isa.opcode_to_u8(Opcode::unlikely).unwrap_or(UNUSED_RAW_OPCODE),
409 _phantom: core::marker::PhantomData,
410 };
411
412 simulator.clear();
413 simulator
414 }
415 }
416
417 pub fn set_force_branch_is_cheap(&mut self, value: Option<bool>) {
418 self.force_branch_is_cheap = value;
419 }
420
421 fn clear(&mut self) {
422 self.cycles = 0;
423 self.instructions = 0;
424 self.finished = false;
425 self.instructions_in_flight = 0;
426 self.decode_slots_remaining_this_cycle = MAX_DECODE_PER_CYCLE;
427 self.resources_available = InstCost {
428 alu_slots: MAX_ALU_SLOTS,
429 mul_slots: MAX_MUL_SLOTS,
430 div_slots: MAX_DIV_SLOTS,
431 load_slots: MAX_LOAD_SLOTS,
432 store_slots: MAX_STORE_SLOTS,
433 ..EMPTY_COST
434 }
435 .resources()
436 | RESOURCES_UNDERFLOW_MASK;
437
438 self.reorder_buffer_tail = 0;
439 self.reorder_buffer_head = 0;
440
441 unsafe_avx2! {
442 self.rob_entry_by_register = i8x16::negative_one();
443 self.rob_state = i8x32::zero();
444 self.rob_cycles_remaining = i8x32::zero();
445 self.rob_required_resources = i16x32::zero();
446 self.rob_dependencies = i32x32::zero();
447 self.rob_depended_by = i32x32::zero();
448 self.registers_written_by_rob_entry = i16x32::zero();
449 }
450
451 if T::SHOULD_CALL_ON_EVENT {
452 self.rob_instruction.fill(0);
453 }
454 }
455
456 fn emit_event(&mut self, slot: u32, kind: EventKind) {
457 if T::SHOULD_CALL_ON_EVENT {
458 self.tracer.on_event(self.cycles, self.rob_instruction[cast(slot).to_usize()], kind);
459 }
460 }
461
462 fn tick_cycle<const FAST_FORWARD: bool>(&mut self) {
463 unsafe_avx2! {
464 self.tick_cycle_avx2::<FAST_FORWARD>();
465 }
466 }
467
468 #[cfg_attr(all(feature = "simd", target_arch = "x86_64"), target_feature(enable = "avx2"))]
469 #[inline(never)]
470 fn emit_events_avx2(&mut self, mask: i8x32, event_kind: EventKind) {
471 if !T::SHOULD_CALL_ON_EVENT {
472 return;
473 }
474
475 let mut bits = mask.most_significant_bits();
476 while bits != 0 {
477 let slot = bits.trailing_zeros();
478 self.emit_event(slot, event_kind);
479 bits &= !(1 << slot);
480 }
481 }
482
483 fn instructions_in_flight(&self) -> u32 {
484 self.instructions_in_flight
485 }
486
487 #[cfg_attr(all(feature = "simd", target_arch = "x86_64"), target_feature(enable = "avx2"))]
488 fn tick_cycle_avx2<const FAST_FORWARD: bool>(&mut self) {
489 let state_decoding = i8x32::splat(1);
490 let state_waiting = i8x32::splat(2);
491 let state_executing = i8x32::splat(3);
492 let state_executed = i8x32::splat(4);
493
494 #[cfg(test)]
495 let original_state = self.rob_state;
496 #[cfg(test)]
497 let original_cycles_remaining = self.rob_cycles_remaining;
498 #[cfg(test)]
499 let original_dependencies = self.rob_dependencies;
500 #[cfg(test)]
501 let original_depended_by = self.rob_depended_by;
502 #[cfg(test)]
503 let original_entry_by_register = self.rob_entry_by_register;
504 #[cfg(test)]
505 let original_required_resources = self.rob_required_resources;
506 #[cfg(test)]
507 let original_decode_slots = self.decode_slots_remaining_this_cycle;
508 #[cfg(test)]
509 let original_reorder_buffer_head = self.reorder_buffer_head;
510 #[cfg(test)]
511 let original_resources_available = self.resources_available;
512 #[cfg(test)]
513 let original_instructions_in_flight = self.instructions_in_flight;
514
515 #[cfg(all(test, feature = "logging"))]
516 log::debug!(
517 "tick_cycle_avx2[{}]: state={:?}",
518 self.cycles,
519 DebugState(self.rob_state.to_array())
520 );
521 #[cfg(all(test, feature = "logging"))]
522 log::debug!(
523 "tick_cycle_avx2[{}]: cycles={:?}",
524 self.cycles,
525 DebugCyclesRemaining(self.rob_cycles_remaining.to_array())
526 );
527 #[cfg(all(test, feature = "logging"))]
528 log::debug!(
529 "tick_cycle_avx2[{}]: dependencies={:?}",
530 self.cycles,
531 DebugDeps(self.rob_dependencies.to_array())
532 );
533 #[cfg(all(test, feature = "logging"))]
534 log::debug!(
535 "tick_cycle_avx2[{}]: depended_by={:?}",
536 self.cycles,
537 DebugDeps(self.rob_depended_by.to_array())
538 );
539 #[cfg(all(test, feature = "logging"))]
540 log::debug!(
541 "tick_cycle_avx2[{}]: entry_by_register={:?}",
542 self.cycles,
543 DebugEntryByRegister(self.rob_entry_by_register.to_array())
544 );
545 #[cfg(all(test, feature = "logging"))]
546 log::debug!(
547 "tick_cycle_avx2[{}]: resources_available={:?}",
548 self.cycles,
549 DebugResources(self.resources_available)
550 );
551
552 debug_assert_eq!(
553 self.rob_state.simd_eq(i8x32::zero()).most_significant_bits().count_zeros(),
554 self.instructions_in_flight
555 );
556
557 {
559 let is_waiting_for_retirement: i8x32 = self.rob_state.simd_eq(state_executed);
560 let leading_count_to_retire = is_waiting_for_retirement
561 .most_significant_bits()
562 .rotate_right(self.reorder_buffer_head)
563 .trailing_ones() as i32;
564
565 let is_retired_this_cycle = i8x32::from_i1x32_sext(
566 (cast(1_u64 << leading_count_to_retire).truncate_to_u32().wrapping_sub(1)).rotate_left(self.reorder_buffer_head) as i32,
567 );
568
569 self.rob_dependencies = self
571 .rob_dependencies
572 .and_not(i32x32::splat(is_retired_this_cycle.most_significant_bits()));
573
574 self.rob_depended_by = self.rob_depended_by.and_not(i32x32::from_i8x32_sext(is_retired_this_cycle));
576
577 self.rob_state = self.rob_state.and_not(is_retired_this_cycle);
579
580 let retired_count = is_retired_this_cycle.most_significant_bits().count_ones();
581 #[cfg(all(test, feature = "logging"))]
582 if retired_count > 0 {
583 log::debug!(
584 "tick_cycle_avx2[{}]: instructions_in_flight: {} -> {}",
585 self.cycles,
586 self.instructions_in_flight,
587 self.instructions_in_flight - retired_count
588 );
589 }
590
591 self.instructions_in_flight -= retired_count;
592 self.reorder_buffer_head = (self.reorder_buffer_head + retired_count) % (REORDER_BUFFER_SIZE as u32);
593
594 self.emit_events_avx2(is_retired_this_cycle, EventKind::Retired);
595 self.emit_events_avx2(
596 is_waiting_for_retirement.and_not(is_retired_this_cycle),
597 EventKind::WaitingForRetirement,
598 );
599
600 debug_assert_eq!(
601 self.rob_state.simd_eq(i8x32::zero()).most_significant_bits().count_zeros(),
602 self.instructions_in_flight
603 );
604 }
605
606 {
607 const RESOURCES_UNDERFLOW_MASK_I16: i16 = RESOURCES_UNDERFLOW_MASK as u16 as i16;
608 let is_executed: i8x32 = self.rob_cycles_remaining.simd_lt(i8x32::splat(1));
609 let is_executed_mask: i32 = is_executed.most_significant_bits();
610 let has_no_dependencies: i8x32 = (self.rob_dependencies.and_not(i32x32::splat(is_executed_mask)))
611 .simd_eq(i32x32::zero())
612 .clamp_to_i8_range();
613
614 let mut is_waiting_to_start: i8x32 = self.rob_state.simd_eq(state_waiting) & has_no_dependencies;
615
616 for _ in 0..5 {
617 #[cfg(all(test, feature = "logging"))]
618 if is_waiting_to_start.most_significant_bits() != 0 {
619 log::debug!(
620 "tick_cycle_avx2[{}]: is_waiting_to_start={:?}",
621 self.cycles,
622 DebugMask(is_waiting_to_start.to_array())
623 );
624 }
625 debug_assert_eq!(self.resources_available & RESOURCES_UNDERFLOW_MASK, RESOURCES_UNDERFLOW_MASK);
626
627 let new_resources: i16x32 = i16x32::splat(self.resources_available as i16) - self.rob_required_resources;
628 let have_enough_resources: i8x32 = (new_resources.and(i16x32::splat(RESOURCES_UNDERFLOW_MASK_I16)))
629 .simd_eq(i16x32::splat(RESOURCES_UNDERFLOW_MASK_I16))
630 .clamp_to_i8_range();
631 let have_enough_resources = have_enough_resources.and(is_waiting_to_start);
632 let mask = have_enough_resources.most_significant_bits().rotate_right(self.reorder_buffer_head);
633 let position = mask.trailing_zeros();
634 if position != 32 {
635 let position = (position + self.reorder_buffer_head) % (REORDER_BUFFER_SIZE as u32);
636 #[cfg(all(test, feature = "logging"))]
637 log::debug!(
638 "tick_cycle_avx2[{}]: starting: instruction={}, slot={}",
639 self.cycles,
640 self.rob_instruction[cast(position).to_usize()],
641 position,
642 );
643
644 let resources_consumed = self.rob_required_resources.as_slice()[cast(position).to_usize()];
645 self.resources_available -= resources_consumed as u32;
646 self.rob_state.as_slice_mut()[cast(position).to_usize()] += 1;
647 is_waiting_to_start.as_slice_mut()[cast(position).to_usize()] = 0;
648 }
649 }
650 self.emit_events_avx2(self.rob_state.simd_eq(state_waiting), EventKind::WaitingForDependencies);
651 }
652
653 let mut cycle_count = 1;
655 {
656 let is_executing: i8x32 = self.rob_state.simd_eq(state_executing);
657 if FAST_FORWARD {
658 let max_cycles =
659 ((self.rob_cycles_remaining & is_executing) | (is_executing ^ i8x32::negative_one())).horizontal_min_unsigned();
660 let max_cycles = cast(max_cycles).to_signed();
661
662 #[cfg(all(test, feature = "logging"))]
663 log::debug!("tick_cycle_avx2[{}]: max_cycles={}", self.cycles, max_cycles);
664 if max_cycles > 0 && self.decode_slots_remaining_this_cycle == MAX_DECODE_PER_CYCLE {
665 cycle_count = max_cycles;
666 }
667 }
668
669 self.rob_cycles_remaining = self.rob_cycles_remaining.saturating_sub(i8x32::splat(cycle_count) & is_executing);
670
671 let is_execution_finished: i8x32 = self.rob_cycles_remaining.simd_eq(i8x32::zero()) & is_executing;
673 let is_execution_finished = is_execution_finished.to_i16x32_sext();
674
675 #[cfg(all(test, feature = "logging"))]
676 log::debug!(
677 "tick_cycle_avx2[{}]: is_execution_finished={:?}",
678 self.cycles,
679 is_execution_finished
680 );
681
682 let retired_register_writes: i16 = (self.registers_written_by_rob_entry & is_execution_finished).bitwise_reduce();
683 self.registers_written_by_rob_entry = self.registers_written_by_rob_entry.and_not(is_execution_finished);
684 self.rob_entry_by_register = self.rob_entry_by_register.or(i8x16::from_i1x16_sext(retired_register_writes));
685
686 let resources_released = cast((self.rob_required_resources & is_execution_finished).wrapping_reduce()).to_unsigned();
688 self.resources_available += u32::from(resources_released);
689 self.rob_required_resources = self.rob_required_resources.and_not(is_execution_finished);
690
691 let is_last_cycle = self.rob_cycles_remaining.simd_eq(i8x32::negative_one());
692 let has_cycles_remaining = self.rob_cycles_remaining.simd_gt(i8x32::negative_one());
693 self.rob_state += i8x32::splat(1) & is_executing.and(is_last_cycle);
694 self.emit_events_avx2(is_executing.and(is_last_cycle), EventKind::Executed);
695 self.emit_events_avx2(is_executing.and(has_cycles_remaining), EventKind::Executing);
696 }
697
698 {
700 let is_decoding = self.rob_state.simd_eq(state_decoding);
701 self.rob_state += i8x32::splat(1) & is_decoding;
702 }
703
704 self.decode_slots_remaining_this_cycle = MAX_DECODE_PER_CYCLE;
705 self.cycles += cast(i32::from(cycle_count)).to_unsigned();
706
707 #[cfg(all(test, feature = "logging"))]
708 {
709 if self.rob_state != original_state {
710 log::debug!("tick_cycle_avx2[{}]: state changed!", self.cycles);
711 } else {
712 log::debug!("tick_cycle_avx2[{}]: state did NOT change!", self.cycles);
713 }
714 }
715
716 #[cfg(test)]
717 {
718 assert!(
719 self.instructions_in_flight != original_instructions_in_flight
720 || self.reorder_buffer_head != original_reorder_buffer_head
721 || self.decode_slots_remaining_this_cycle != original_decode_slots
722 || self.resources_available != original_resources_available
723 || self.rob_state != original_state
724 || self.rob_cycles_remaining.max_signed(i8x32::negative_one())
725 != original_cycles_remaining.max_signed(i8x32::negative_one())
726 || self.rob_dependencies != original_dependencies
727 || self.rob_depended_by != original_depended_by
728 || self.rob_entry_by_register != original_entry_by_register
729 || self.rob_required_resources != original_required_resources,
730 "made no progress"
731 );
732 }
733 }
734
735 #[inline(always)]
736 fn tick_cycle_if_cannot_decode(&mut self, decode_slots: u32) {
737 let mut should_tick =
738 self.decode_slots_remaining_this_cycle < decode_slots || self.instructions_in_flight() == (REORDER_BUFFER_SIZE as u32);
739 while should_tick {
740 self.tick_cycle::<false>();
741 should_tick = self.instructions_in_flight() == (REORDER_BUFFER_SIZE as u32);
742 }
743 }
744
745 #[inline(always)]
746 fn wait_until_empty(&mut self) {
747 #[cfg(all(test, feature = "logging"))]
748 if self.instructions_in_flight() > 0 {
749 log::debug!("wait_until_empty[{}]: starting fast forward!", self.cycles);
750 }
751
752 while self.instructions_in_flight() > 0 {
753 if self.tracer.should_enable_fast_forward() {
754 self.tick_cycle::<true>();
755 } else {
756 self.tick_cycle::<false>();
757 }
758 }
759 }
760
761 fn dispatch_generic(&mut self, dst: Option<RawReg>, src1: Option<RawReg>, src2: Option<RawReg>, cost: InstCost) {
762 #[cfg(all(test, feature = "logging"))]
763 log::debug!(
764 "dispatch[{}]: instruction={:?}, dst={:?}, src=[{:?}, {:?}], slots={}, latency={}, alu={}, load={}, store={}, mul={}, div={}",
765 self.cycles,
766 self.instructions,
767 dst.map(|reg| reg.get()),
768 src1.map(|reg| reg.get()),
769 src2.map(|reg| reg.get()),
770 cost.decode_slots,
771 cost.latency,
772 cost.alu_slots,
773 cost.load_slots,
774 cost.store_slots,
775 cost.mul_slots,
776 cost.div_slots,
777 );
778
779 debug_assert!(cost.latency >= 0);
780 unsafe_avx2! { self.dispatch_generic_avx2(dst, src1, src2, cost) }
781 }
782
783 #[cfg_attr(all(feature = "simd", target_arch = "x86_64"), target_feature(enable = "avx2"))]
784 fn dispatch_generic_avx2(&mut self, dst: Option<RawReg>, src1: Option<RawReg>, src2: Option<RawReg>, cost: InstCost) {
785 let dst = dst.map(|dst| dst.get());
786 let src1 = src1.map(|src1| src1.get());
787 let src2 = src2.map(|src2| src2.get());
788
789 self.tick_cycle_if_cannot_decode(cost.decode_slots);
790 if T::SHOULD_CALL_ON_EVENT {
791 self.tracer.on_event(self.cycles, self.instructions, EventKind::Decode);
792 }
793
794 let slot = self.reorder_buffer_tail;
795 self.reorder_buffer_tail = (self.reorder_buffer_tail + 1) % (REORDER_BUFFER_SIZE as u32);
796 let slot_mask = i8x32::zero().set_dynamic(cast(slot).truncate_to_u8(), cast(0xff_u8).to_signed());
797
798 self.rob_cycles_remaining = self.rob_cycles_remaining.set_dynamic(slot as u8, cost.latency);
799 self.rob_required_resources.as_slice_mut()[slot as usize] = cost.resources() as u16 as i16;
800
801 let dependency_1: Option<u32> = src1
802 .map(|src1| self.rob_entry_by_register.as_slice()[src1.to_usize()])
803 .map(i32::from)
804 .map(|x| cast(x).to_unsigned());
805 let dependency_2: Option<u32> = src2
806 .map(|src2| self.rob_entry_by_register.as_slice()[src2.to_usize()])
807 .map(i32::from)
808 .map(|x| cast(x).to_unsigned());
809 match (dependency_1, dependency_2) {
810 (Some(dependency_1), Some(dependency_2)) => {
811 let base_1 = (dependency_1 >> 31) ^ 1;
812 let base_2 = (dependency_2 >> 31) ^ 1;
813 let dependencies_mask = cast(base_1.wrapping_shl(dependency_1) | base_2.wrapping_shl(dependency_2)).to_signed();
814 self.rob_dependencies.as_slice_mut()[slot as usize] = dependencies_mask;
815 self.rob_depended_by.as_slice_mut()[(dependency_1 * base_1) as usize] |= cast(base_1 << slot).to_signed();
816 self.rob_depended_by.as_slice_mut()[(dependency_2 * base_2) as usize] |= cast(base_2 << slot).to_signed();
817 }
818 (Some(dependency), None) | (None, Some(dependency)) => {
819 let base = (dependency >> 31) ^ 1;
820 self.rob_dependencies.as_slice_mut()[slot as usize] = cast(base.wrapping_shl(dependency)).to_signed();
821 self.rob_depended_by.as_slice_mut()[(dependency * base) as usize] |= cast(base.wrapping_shl(slot)).to_signed();
822 }
823 (None, None) => {}
824 }
825
826 if let Some(dst) = dst {
827 let dst_mask: i16x32 = i16x32::splat(cast(cast(1_u32 << dst.to_u32()).truncate_to_u16()).to_signed());
828 self.registers_written_by_rob_entry =
829 self.registers_written_by_rob_entry.and_not(dst_mask) | (slot_mask.to_i16x32_sext() & dst_mask);
830 self.rob_entry_by_register.as_slice_mut()[dst.to_usize()] = cast(cast(slot).truncate_to_u8()).to_signed();
831 }
832
833 self.rob_state = self.rob_state.set_dynamic(slot as u8, 1);
834 if T::SHOULD_CALL_ON_EVENT {
835 self.rob_instruction[cast(slot).to_usize()] = self.instructions;
836 }
837
838 self.instructions_in_flight += 1;
839 self.decode_slots_remaining_this_cycle -= cost.decode_slots;
840 self.instructions += 1;
841
842 debug_assert_eq!(
843 self.rob_state.simd_eq(i8x32::zero()).most_significant_bits().count_zeros(),
844 self.instructions_in_flight
845 );
846 }
847
848 fn dispatch_move_reg_avx2(&mut self, dst: RawReg, src: RawReg) {
849 let dst = dst.get();
850 let src = src.get();
851
852 self.tick_cycle_if_cannot_decode(1);
853 if T::SHOULD_CALL_ON_EVENT {
854 self.tracer.on_event(self.cycles, self.instructions, EventKind::Decode);
855 }
856
857 let entry_by_register = self.rob_entry_by_register.as_slice_mut();
858 let registers_written_by_rob_entry = self.registers_written_by_rob_entry.as_slice_mut();
859 let old_slot = entry_by_register[dst.to_usize()];
860 if old_slot != -1 {
861 registers_written_by_rob_entry[old_slot as usize] &= !(1_i16 << dst.to_usize());
862 }
863
864 let new_slot = entry_by_register[src.to_usize()];
865 if new_slot != -1 {
866 registers_written_by_rob_entry[new_slot as usize] |= 1 << dst.to_usize();
867 }
868
869 entry_by_register[dst.to_usize()] = new_slot;
870 self.decode_slots_remaining_this_cycle -= 1;
871 self.instructions += 1;
872 }
873
874 fn dispatch_3op(&mut self, dst: RawReg, src1: RawReg, src2: RawReg, cost: InstCost) {
875 self.dispatch_generic(Some(dst), Some(src1), Some(src2), cost);
876 }
877
878 fn dispatch_2op(&mut self, dst: RawReg, src: RawReg, cost: InstCost) {
879 self.dispatch_generic(Some(dst), Some(src), None, cost);
880 }
881
882 fn dispatch_1op_dst(&mut self, dst: RawReg, cost: InstCost) {
883 self.dispatch_generic(Some(dst), None, None, cost);
884 }
885
886 fn dispatch_finish(&mut self, latency: i8) {
887 self.dispatch_generic(
888 None,
889 None,
890 None,
891 InstCost {
892 latency,
893 decode_slots: 1,
894 ..EMPTY_COST
895 },
896 );
897
898 self.wait_until_empty();
899 self.finished = true;
900 }
901
902 fn load_cost(&self) -> InstCost {
903 InstCost {
904 latency: self.cache_model.memory_access_cost,
905 decode_slots: 1,
906 alu_slots: 1,
907 load_slots: 1,
908 ..EMPTY_COST
909 }
910 }
911
912 fn dispatch_indirect_load(&mut self, dst: RawReg, base: RawReg, _offset: u32, _size: u32) {
913 self.dispatch_2op(dst, base, self.load_cost());
914 }
915
916 fn dispatch_load(&mut self, dst: RawReg, _offset: u32, _size: u32) {
917 self.dispatch_1op_dst(dst, self.load_cost());
918 }
919
920 #[allow(clippy::unused_self)]
921 fn store_cost(&self) -> InstCost {
922 InstCost {
923 latency: 25,
924 decode_slots: 1,
925 alu_slots: 1,
926 store_slots: 1,
927 ..EMPTY_COST
928 }
929 }
930
931 fn dispatch_store(&mut self, src: RawReg, _offset: u32, _size: u32) {
932 self.dispatch_generic(None, Some(src), None, self.store_cost());
933 }
934
935 fn dispatch_store_imm(&mut self, _offset: u32, _size: u32) {
936 self.dispatch_generic(None, None, None, self.store_cost());
937 }
938
939 fn dispatch_store_indirect(&mut self, src: RawReg, base: RawReg, _offset: u32, _size: u32) {
940 self.dispatch_generic(None, Some(src), Some(base), self.store_cost());
941 }
942
943 fn dispatch_store_imm_indirect(&mut self, base: RawReg, _offset: u32, _size: u32) {
944 self.dispatch_generic(None, Some(base), None, self.store_cost());
945 }
946
947 fn get_branch_cost(&self, offset: u32, args_length: u32, jump_offset: u32) -> i8 {
948 const BRANCH_PREDICTION_HIT_COST: i8 = 1;
949 const BRANCH_PREDICTION_MISS_COST: i8 = 20;
950
951 if let Some(is_hit) = self.force_branch_is_cheap {
952 return if is_hit {
953 BRANCH_PREDICTION_HIT_COST
954 } else {
955 BRANCH_PREDICTION_MISS_COST
956 };
957 }
958
959 if self
960 .code
961 .get(cast(offset).to_usize() + cast(args_length).to_usize())
962 .map(|&opcode| opcode == self.opcode_unlikely || opcode == self.opcode_trap)
963 .unwrap_or(true)
964 {
965 return BRANCH_PREDICTION_HIT_COST;
966 }
967
968 if self
969 .code
970 .get(cast(jump_offset).to_usize())
971 .map(|&opcode| opcode == self.opcode_unlikely || opcode == self.opcode_trap)
972 .unwrap_or(true)
973 {
974 return BRANCH_PREDICTION_HIT_COST;
975 }
976
977 BRANCH_PREDICTION_MISS_COST
978 }
979
980 fn dispatch_branch(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, jump_offset: u32) {
981 self.dispatch_generic(
982 None,
983 Some(s1),
984 Some(s2),
985 InstCost {
986 latency: self.get_branch_cost(offset, args_length, jump_offset),
987 decode_slots: 1,
988 alu_slots: 1,
989 ..EMPTY_COST
990 },
991 );
992 self.wait_until_empty();
993 self.finished = true;
994 }
995
996 fn dispatch_branch_imm(&mut self, offset: u32, args_length: u32, s: RawReg, jump_offset: u32) {
997 self.dispatch_generic(
998 None,
999 Some(s),
1000 None,
1001 InstCost {
1002 latency: self.get_branch_cost(offset, args_length, jump_offset),
1003 decode_slots: 1,
1004 alu_slots: 1,
1005 ..EMPTY_COST
1006 },
1007 );
1008 self.wait_until_empty();
1009 self.finished = true;
1010 }
1011
1012 fn dispatch_trivial_2op_1c(&mut self, d: RawReg, s: RawReg) {
1013 self.dispatch_2op(
1014 d,
1015 s,
1016 InstCost {
1017 latency: 1,
1018 decode_slots: 1,
1019 alu_slots: 1,
1020 ..EMPTY_COST
1021 },
1022 );
1023 }
1024
1025 fn dispatch_trivial_2op_2c(&mut self, d: RawReg, s: RawReg) {
1026 self.dispatch_2op(
1027 d,
1028 s,
1029 InstCost {
1030 latency: 2,
1031 decode_slots: 1,
1032 alu_slots: 2,
1033 ..EMPTY_COST
1034 },
1035 );
1036 }
1037
1038 fn dispatch_simple_alu_2op(&mut self, d: RawReg, s: RawReg) {
1039 self.dispatch_2op(
1040 d,
1041 s,
1042 InstCost {
1043 latency: 1,
1044 decode_slots: 1 + u32::from(d.get() != s.get()),
1045 alu_slots: 1,
1046 ..EMPTY_COST
1047 },
1048 );
1049 }
1050
1051 fn dispatch_simple_alu_2op_32bit(&mut self, d: RawReg, s: RawReg) {
1052 self.dispatch_2op(
1053 d,
1054 s,
1055 InstCost {
1056 latency: 1 + i8::from(B::BITNESS == Bitness::B64),
1057 decode_slots: 1 + u32::from(d.get() != s.get()) + u32::from(B::BITNESS == Bitness::B64),
1058 alu_slots: 1,
1059 ..EMPTY_COST
1060 },
1061 );
1062 }
1063
1064 fn dispatch_simple_alu_3op(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1065 self.dispatch_3op(
1066 d,
1067 s1,
1068 s2,
1069 InstCost {
1070 latency: 1,
1071 decode_slots: 1 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
1072 alu_slots: 1,
1073 ..EMPTY_COST
1074 },
1075 );
1076 }
1077
1078 fn dispatch_simple_alu_3op_32(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1079 self.dispatch_3op(
1080 d,
1081 s1,
1082 s2,
1083 InstCost {
1084 latency: 1 + i8::from(B::BITNESS == Bitness::B64),
1085 decode_slots: 1 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())) + u32::from(B::BITNESS == Bitness::B64),
1086 alu_slots: 1,
1087 ..EMPTY_COST
1088 },
1089 );
1090 }
1091
1092 fn dispatch_shift(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1093 self.dispatch_3op(
1094 d,
1095 s1,
1096 s2,
1097 InstCost {
1098 latency: 1,
1099 decode_slots: 2 + u32::from(d.get() != s1.get()),
1100 alu_slots: 1,
1101 ..EMPTY_COST
1102 },
1103 )
1104 }
1105
1106 fn dispatch_shift_32(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1107 self.dispatch_3op(
1108 d,
1109 s1,
1110 s2,
1111 InstCost {
1112 latency: 1 + i8::from(B::BITNESS == Bitness::B64),
1113 decode_slots: 2 + u32::from(d.get() != s1.get()) + u32::from(B::BITNESS == Bitness::B64),
1114 alu_slots: 1,
1115 ..EMPTY_COST
1116 },
1117 )
1118 }
1119
1120 fn dispatch_shift_imm_alt(&mut self, d: RawReg, s: RawReg) {
1121 self.dispatch_2op(
1122 d,
1123 s,
1124 InstCost {
1125 latency: 1,
1126 decode_slots: 3,
1127 alu_slots: 1,
1128 ..EMPTY_COST
1129 },
1130 )
1131 }
1132
1133 fn dispatch_shift_imm_alt_32(&mut self, d: RawReg, s: RawReg) {
1134 self.dispatch_2op(
1135 d,
1136 s,
1137 InstCost {
1138 latency: 2,
1139 decode_slots: 4,
1140 alu_slots: 1,
1141 ..EMPTY_COST
1142 },
1143 )
1144 }
1145
1146 fn dispatch_compare(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1147 self.dispatch_3op(
1148 d,
1149 s1,
1150 s2,
1151 InstCost {
1152 latency: 3,
1153 decode_slots: 3,
1154 alu_slots: 1,
1155 ..EMPTY_COST
1156 },
1157 )
1158 }
1159
1160 fn dispatch_compare_imm(&mut self, d: RawReg, s: RawReg) {
1161 self.dispatch_2op(
1162 d,
1163 s,
1164 InstCost {
1165 latency: 3,
1166 decode_slots: 3,
1167 alu_slots: 1,
1168 ..EMPTY_COST
1169 },
1170 )
1171 }
1172
1173 fn dispatch_cmov(&mut self, d: RawReg, s: RawReg, c: RawReg) {
1174 self.dispatch_3op(
1175 d,
1176 s,
1177 c,
1178 InstCost {
1179 latency: 2,
1180 decode_slots: 2,
1181 alu_slots: 1,
1182 ..EMPTY_COST
1183 },
1184 )
1185 }
1186
1187 fn dispatch_cmov_imm(&mut self, d: RawReg, c: RawReg) {
1188 self.dispatch_2op(
1189 d,
1190 c,
1191 InstCost {
1192 latency: 2,
1193 decode_slots: 3,
1194 alu_slots: 1,
1195 ..EMPTY_COST
1196 },
1197 )
1198 }
1199
1200 fn dispatch_min_max(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1201 self.dispatch_3op(
1202 d,
1203 s1,
1204 s2,
1205 InstCost {
1206 latency: 3,
1207 decode_slots: 2 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
1208 alu_slots: 1,
1209 ..EMPTY_COST
1210 },
1211 )
1212 }
1213
1214 fn dispatch_division(&mut self, d: RawReg, s1: RawReg, s2: RawReg) {
1215 self.dispatch_3op(
1216 d,
1217 s1,
1218 s2,
1219 InstCost {
1220 latency: 60,
1221 decode_slots: 4,
1222 alu_slots: 1,
1223 div_slots: 1,
1224 ..EMPTY_COST
1225 },
1226 )
1227 }
1228}
1229
1230impl<'a, B, T> GasVisitorT for Simulator<'a, B, T>
1231where
1232 B: BitnessT,
1233 T: Tracer,
1234{
1235 #[inline]
1236 fn take_block_cost(&mut self) -> Option<u32> {
1237 if (self.instructions_in_flight() == 0) & self.finished {
1238 let cycles = self.cycles;
1239 self.clear();
1240
1241 let cycles = cast((cast(cycles).to_signed() - GAS_COST_SLACK).max(1)).to_unsigned();
1242 Some(cycles)
1243 } else {
1244 None
1245 }
1246 }
1247
1248 fn is_at_start_of_basic_block(&self) -> bool {
1249 self.instructions == 0
1250 }
1251}
1252
1253impl<'a, B, T> ParsingVisitor for Simulator<'a, B, T>
1254where
1255 B: BitnessT,
1256 T: Tracer,
1257{
1258 type ReturnTy = ();
1259
1260 #[inline(always)]
1263 fn xor(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1264 self.dispatch_simple_alu_3op(d, s1, s2)
1265 }
1266
1267 #[inline(always)]
1268 fn and(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1269 self.dispatch_simple_alu_3op(d, s1, s2)
1270 }
1271
1272 #[inline(always)]
1273 fn or(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1274 self.dispatch_simple_alu_3op(d, s1, s2)
1275 }
1276
1277 #[inline(always)]
1278 fn add_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1279 self.dispatch_simple_alu_3op(d, s1, s2)
1280 }
1281
1282 #[inline(always)]
1283 fn sub_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1284 self.dispatch_simple_alu_3op(d, s1, s2)
1285 }
1286
1287 #[inline(always)]
1290 fn add_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1291 self.dispatch_simple_alu_3op_32(d, s1, s2)
1292 }
1293
1294 #[inline(always)]
1295 fn sub_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1296 self.dispatch_simple_alu_3op_32(d, s1, s2)
1297 }
1298
1299 #[inline(always)]
1302 fn xor_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1303 self.dispatch_simple_alu_2op(d, s)
1304 }
1305
1306 #[inline(always)]
1307 fn and_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1308 self.dispatch_simple_alu_2op(d, s)
1309 }
1310
1311 #[inline(always)]
1312 fn or_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1313 self.dispatch_simple_alu_2op(d, s)
1314 }
1315
1316 #[inline(always)]
1317 fn add_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1318 self.dispatch_simple_alu_2op(d, s)
1320 }
1321
1322 #[inline(always)]
1323 fn shift_logical_right_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1324 self.dispatch_simple_alu_2op(d, s1)
1325 }
1326
1327 #[inline(always)]
1328 fn shift_arithmetic_right_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1329 self.dispatch_simple_alu_2op(d, s1)
1330 }
1331
1332 #[inline(always)]
1333 fn shift_logical_left_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1334 self.dispatch_simple_alu_2op(d, s1)
1335 }
1336
1337 #[inline(always)]
1338 fn rotate_right_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _c: u32) -> Self::ReturnTy {
1339 self.dispatch_simple_alu_2op(d, s1)
1340 }
1341
1342 #[inline(always)]
1343 fn reverse_byte(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1344 self.dispatch_simple_alu_2op(d, s)
1345 }
1346
1347 #[inline(always)]
1350 fn add_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _imm: u32) -> Self::ReturnTy {
1351 self.dispatch_simple_alu_2op_32bit(d, s)
1353 }
1354
1355 #[inline(always)]
1356 fn shift_logical_right_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1357 self.dispatch_simple_alu_2op_32bit(d, s1)
1358 }
1359
1360 #[inline(always)]
1361 fn shift_arithmetic_right_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1362 self.dispatch_simple_alu_2op_32bit(d, s1)
1363 }
1364
1365 #[inline(always)]
1366 fn shift_logical_left_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1367 self.dispatch_simple_alu_2op_32bit(d, s1)
1368 }
1369
1370 #[inline(always)]
1371 fn rotate_right_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _c: u32) -> Self::ReturnTy {
1372 self.dispatch_simple_alu_2op_32bit(d, s1)
1373 }
1374
1375 #[inline(always)]
1378 fn count_leading_zero_bits_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1379 self.dispatch_trivial_2op_1c(d, s)
1380 }
1381
1382 #[inline(always)]
1383 fn count_leading_zero_bits_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1384 self.dispatch_trivial_2op_1c(d, s)
1385 }
1386
1387 #[inline(always)]
1388 fn count_set_bits_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1389 self.dispatch_trivial_2op_1c(d, s)
1390 }
1391
1392 #[inline(always)]
1393 fn count_set_bits_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1394 self.dispatch_trivial_2op_1c(d, s)
1395 }
1396
1397 #[inline(always)]
1398 fn sign_extend_8(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1399 self.dispatch_trivial_2op_1c(d, s)
1400 }
1401
1402 #[inline(always)]
1403 fn sign_extend_16(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1404 self.dispatch_trivial_2op_1c(d, s)
1405 }
1406
1407 #[inline(always)]
1408 fn zero_extend_16(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1409 self.dispatch_trivial_2op_1c(d, s)
1410 }
1411
1412 #[inline(always)]
1415 fn count_trailing_zero_bits_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1416 self.dispatch_trivial_2op_2c(d, s)
1417 }
1418
1419 #[inline(always)]
1420 fn count_trailing_zero_bits_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy {
1421 self.dispatch_trivial_2op_2c(d, s)
1422 }
1423
1424 #[inline(always)]
1427 fn shift_logical_right_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1428 self.dispatch_shift(d, s1, s2)
1429 }
1430
1431 #[inline(always)]
1432 fn shift_arithmetic_right_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1433 self.dispatch_shift(d, s1, s2)
1434 }
1435
1436 #[inline(always)]
1437 fn shift_logical_left_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1438 self.dispatch_shift(d, s1, s2)
1439 }
1440
1441 #[inline(always)]
1442 fn rotate_left_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1443 self.dispatch_shift(d, s1, s2)
1444 }
1445
1446 #[inline(always)]
1447 fn rotate_right_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1448 self.dispatch_shift(d, s1, s2)
1449 }
1450
1451 #[inline(always)]
1454 fn shift_logical_right_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1455 self.dispatch_shift_32(d, s1, s2)
1456 }
1457
1458 #[inline(always)]
1459 fn shift_arithmetic_right_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1460 self.dispatch_shift_32(d, s1, s2)
1461 }
1462
1463 #[inline(always)]
1464 fn shift_logical_left_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1465 self.dispatch_shift_32(d, s1, s2)
1466 }
1467
1468 #[inline(always)]
1469 fn rotate_left_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1470 self.dispatch_shift_32(d, s1, s2)
1471 }
1472
1473 #[inline(always)]
1474 fn rotate_right_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1475 self.dispatch_shift_32(d, s1, s2)
1476 }
1477
1478 #[inline(always)]
1481 fn shift_logical_right_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1482 self.dispatch_shift_imm_alt(d, s2)
1483 }
1484
1485 #[inline(always)]
1486 fn shift_arithmetic_right_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1487 self.dispatch_shift_imm_alt(d, s2)
1488 }
1489
1490 #[inline(always)]
1491 fn shift_logical_left_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1492 self.dispatch_shift_imm_alt(d, s2)
1493 }
1494
1495 #[inline(always)]
1496 fn rotate_right_imm_alt_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _c: u32) -> Self::ReturnTy {
1497 self.dispatch_shift_imm_alt(d, s)
1498 }
1499
1500 #[inline(always)]
1503 fn shift_logical_right_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1504 self.dispatch_shift_imm_alt_32(d, s2)
1505 }
1506
1507 #[inline(always)]
1508 fn shift_arithmetic_right_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1509 self.dispatch_shift_imm_alt_32(d, s2)
1510 }
1511
1512 #[inline(always)]
1513 fn shift_logical_left_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s2: RawReg, _s1: u32) -> Self::ReturnTy {
1514 self.dispatch_shift_imm_alt_32(d, s2)
1515 }
1516
1517 #[inline(always)]
1518 fn rotate_right_imm_alt_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, _c: u32) -> Self::ReturnTy {
1519 self.dispatch_shift_imm_alt_32(d, s)
1520 }
1521
1522 #[inline(always)]
1525 fn set_less_than_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1526 self.dispatch_compare(d, s1, s2)
1527 }
1528
1529 #[inline(always)]
1530 fn set_less_than_signed(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1531 self.dispatch_compare(d, s1, s2)
1532 }
1533
1534 #[inline(always)]
1537 fn set_less_than_unsigned_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1538 self.dispatch_compare_imm(d, s1)
1539 }
1540
1541 #[inline(always)]
1542 fn set_less_than_signed_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1543 self.dispatch_compare_imm(d, s1)
1544 }
1545
1546 #[inline(always)]
1547 fn set_greater_than_unsigned_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1548 self.dispatch_compare_imm(d, s1)
1549 }
1550
1551 #[inline(always)]
1552 fn set_greater_than_signed_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1553 self.dispatch_compare_imm(d, s1)
1554 }
1555
1556 #[inline(always)]
1559 fn cmov_if_zero(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, c: RawReg) -> Self::ReturnTy {
1560 self.dispatch_cmov(d, s, c)
1561 }
1562
1563 #[inline(always)]
1564 fn cmov_if_not_zero(&mut self, _offset: u32, _args_length: u32, d: RawReg, s: RawReg, c: RawReg) -> Self::ReturnTy {
1565 self.dispatch_cmov(d, s, c)
1566 }
1567
1568 #[inline(always)]
1569 fn cmov_if_zero_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, c: RawReg, _s: u32) -> Self::ReturnTy {
1570 self.dispatch_cmov_imm(d, c)
1571 }
1572
1573 #[inline(always)]
1574 fn cmov_if_not_zero_imm(&mut self, _offset: u32, _args_length: u32, d: RawReg, c: RawReg, _s: u32) -> Self::ReturnTy {
1575 self.dispatch_cmov_imm(d, c)
1576 }
1577
1578 #[inline(always)]
1581 fn maximum(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1582 self.dispatch_min_max(d, s1, s2)
1583 }
1584
1585 #[inline(always)]
1586 fn maximum_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1587 self.dispatch_min_max(d, s1, s2)
1588 }
1589
1590 #[inline(always)]
1591 fn minimum(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1592 self.dispatch_min_max(d, s1, s2)
1593 }
1594
1595 #[inline(always)]
1596 fn minimum_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1597 self.dispatch_min_max(d, s1, s2)
1598 }
1599
1600 #[inline(always)]
1603 fn load_indirect_u8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1604 self.dispatch_indirect_load(dst, base, offset, 1)
1605 }
1606
1607 #[inline(always)]
1608 fn load_indirect_i8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1609 self.dispatch_indirect_load(dst, base, offset, 1)
1610 }
1611
1612 #[inline(always)]
1613 fn load_indirect_u16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1614 self.dispatch_indirect_load(dst, base, offset, 2)
1615 }
1616
1617 #[inline(always)]
1618 fn load_indirect_i16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1619 self.dispatch_indirect_load(dst, base, offset, 2)
1620 }
1621
1622 #[inline(always)]
1623 fn load_indirect_u32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1624 self.dispatch_indirect_load(dst, base, offset, 4)
1625 }
1626
1627 #[inline(always)]
1628 fn load_indirect_i32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1629 self.dispatch_indirect_load(dst, base, offset, 4)
1630 }
1631
1632 #[inline(always)]
1633 fn load_indirect_u64(&mut self, _offset: u32, _args_length: u32, dst: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1634 self.dispatch_indirect_load(dst, base, offset, 8)
1635 }
1636
1637 #[inline(always)]
1640 fn load_u8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1641 self.dispatch_load(dst, offset, 1)
1642 }
1643
1644 #[inline(always)]
1645 fn load_i8(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1646 self.dispatch_load(dst, offset, 1)
1647 }
1648
1649 #[inline(always)]
1650 fn load_u16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1651 self.dispatch_load(dst, offset, 2)
1652 }
1653
1654 #[inline(always)]
1655 fn load_i16(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1656 self.dispatch_load(dst, offset, 2)
1657 }
1658
1659 #[inline(always)]
1660 fn load_u32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1661 self.dispatch_load(dst, offset, 4)
1662 }
1663
1664 #[inline(always)]
1665 fn load_i32(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1666 self.dispatch_load(dst, offset, 4)
1667 }
1668
1669 #[inline(always)]
1670 fn load_u64(&mut self, _offset: u32, _args_length: u32, dst: RawReg, offset: u32) -> Self::ReturnTy {
1671 self.dispatch_load(dst, offset, 8)
1672 }
1673
1674 #[inline(always)]
1677 fn store_imm_indirect_u8(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1678 self.dispatch_store_imm_indirect(base, offset, 1)
1679 }
1680
1681 #[inline(always)]
1682 fn store_imm_indirect_u16(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1683 self.dispatch_store_imm_indirect(base, offset, 2)
1684 }
1685
1686 #[inline(always)]
1687 fn store_imm_indirect_u32(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1688 self.dispatch_store_imm_indirect(base, offset, 4)
1689 }
1690
1691 #[inline(always)]
1692 fn store_imm_indirect_u64(&mut self, _offset: u32, _args_length: u32, base: RawReg, offset: u32, _value: u32) -> Self::ReturnTy {
1693 self.dispatch_store_imm_indirect(base, offset, 8)
1694 }
1695
1696 #[inline(always)]
1699 fn store_indirect_u8(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1700 self.dispatch_store_indirect(src, base, offset, 1)
1701 }
1702
1703 #[inline(always)]
1704 fn store_indirect_u16(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1705 self.dispatch_store_indirect(src, base, offset, 2)
1706 }
1707
1708 #[inline(always)]
1709 fn store_indirect_u32(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1710 self.dispatch_store_indirect(src, base, offset, 4)
1711 }
1712
1713 #[inline(always)]
1714 fn store_indirect_u64(&mut self, _offset: u32, _args_length: u32, src: RawReg, base: RawReg, offset: u32) -> Self::ReturnTy {
1715 self.dispatch_store_indirect(src, base, offset, 8)
1716 }
1717
1718 #[inline(always)]
1721 fn store_imm_u8(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1722 self.dispatch_store_imm(offset, 1)
1723 }
1724
1725 #[inline(always)]
1726 fn store_imm_u16(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1727 self.dispatch_store_imm(offset, 2)
1728 }
1729
1730 #[inline(always)]
1731 fn store_imm_u32(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1732 self.dispatch_store_imm(offset, 4)
1733 }
1734
1735 #[inline(always)]
1736 fn store_imm_u64(&mut self, _offset: u32, _args_length: u32, offset: u32, _value: u32) -> Self::ReturnTy {
1737 self.dispatch_store_imm(offset, 8)
1738 }
1739
1740 #[inline(always)]
1743 fn store_u8(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1744 self.dispatch_store(src, offset, 1)
1745 }
1746
1747 #[inline(always)]
1748 fn store_u16(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1749 self.dispatch_store(src, offset, 2)
1750 }
1751
1752 #[inline(always)]
1753 fn store_u32(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1754 self.dispatch_store(src, offset, 4)
1755 }
1756
1757 #[inline(always)]
1758 fn store_u64(&mut self, _offset: u32, _args_length: u32, src: RawReg, offset: u32) -> Self::ReturnTy {
1759 self.dispatch_store(src, offset, 8)
1760 }
1761
1762 #[inline(always)]
1765 fn branch_less_unsigned(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1766 self.dispatch_branch(offset, args_length, s1, s2, imm)
1767 }
1768
1769 #[inline(always)]
1770 fn branch_less_signed(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1771 self.dispatch_branch(offset, args_length, s1, s2, imm)
1772 }
1773
1774 #[inline(always)]
1775 fn branch_greater_or_equal_unsigned(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1776 self.dispatch_branch(offset, args_length, s1, s2, imm)
1777 }
1778
1779 #[inline(always)]
1780 fn branch_greater_or_equal_signed(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1781 self.dispatch_branch(offset, args_length, s1, s2, imm)
1782 }
1783
1784 #[inline(always)]
1785 fn branch_eq(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1786 self.dispatch_branch(offset, args_length, s1, s2, imm)
1787 }
1788
1789 #[inline(always)]
1790 fn branch_not_eq(&mut self, offset: u32, args_length: u32, s1: RawReg, s2: RawReg, imm: u32) -> Self::ReturnTy {
1791 self.dispatch_branch(offset, args_length, s1, s2, imm)
1792 }
1793
1794 #[inline(always)]
1797 fn branch_eq_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1798 self.dispatch_branch_imm(offset, args_length, s1, imm);
1799 }
1800
1801 #[inline(always)]
1802 fn branch_not_eq_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1803 self.dispatch_branch_imm(offset, args_length, s1, imm);
1804 }
1805
1806 #[inline(always)]
1807 fn branch_less_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1808 self.dispatch_branch_imm(offset, args_length, s1, imm);
1809 }
1810
1811 #[inline(always)]
1812 fn branch_less_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1813 self.dispatch_branch_imm(offset, args_length, s1, imm);
1814 }
1815
1816 #[inline(always)]
1817 fn branch_greater_or_equal_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1818 self.dispatch_branch_imm(offset, args_length, s1, imm);
1819 }
1820
1821 #[inline(always)]
1822 fn branch_greater_or_equal_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1823 self.dispatch_branch_imm(offset, args_length, s1, imm);
1824 }
1825
1826 #[inline(always)]
1827 fn branch_less_or_equal_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1828 self.dispatch_branch_imm(offset, args_length, s1, imm);
1829 }
1830
1831 #[inline(always)]
1832 fn branch_less_or_equal_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1833 self.dispatch_branch_imm(offset, args_length, s1, imm);
1834 }
1835
1836 #[inline(always)]
1837 fn branch_greater_unsigned_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1838 self.dispatch_branch_imm(offset, args_length, s1, imm);
1839 }
1840
1841 #[inline(always)]
1842 fn branch_greater_signed_imm(&mut self, offset: u32, args_length: u32, s1: RawReg, _s2: u32, imm: u32) -> Self::ReturnTy {
1843 self.dispatch_branch_imm(offset, args_length, s1, imm);
1844 }
1845
1846 #[inline(always)]
1849 fn div_unsigned_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1850 self.dispatch_division(d, s1, s2)
1851 }
1852
1853 #[inline(always)]
1854 fn div_signed_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1855 self.dispatch_division(d, s1, s2)
1856 }
1857
1858 #[inline(always)]
1859 fn rem_unsigned_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1860 self.dispatch_division(d, s1, s2)
1861 }
1862
1863 #[inline(always)]
1864 fn rem_signed_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1865 self.dispatch_division(d, s1, s2)
1866 }
1867
1868 #[inline(always)]
1869 fn div_unsigned_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1870 self.dispatch_division(d, s1, s2)
1871 }
1872
1873 #[inline(always)]
1874 fn div_signed_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1875 self.dispatch_division(d, s1, s2)
1876 }
1877
1878 #[inline(always)]
1879 fn rem_unsigned_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1880 self.dispatch_division(d, s1, s2)
1881 }
1882
1883 #[inline(always)]
1884 fn rem_signed_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1885 self.dispatch_division(d, s1, s2)
1886 }
1887
1888 #[inline(always)]
1891 fn and_inverted(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1892 self.dispatch_3op(
1894 d,
1895 s1,
1896 s2,
1897 InstCost {
1898 latency: 2,
1899 decode_slots: 3,
1900 alu_slots: 1,
1901 ..EMPTY_COST
1902 },
1903 )
1904 }
1905
1906 #[inline(always)]
1907 fn or_inverted(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1908 self.dispatch_3op(
1910 d,
1911 s1,
1912 s2,
1913 InstCost {
1914 latency: 2,
1915 decode_slots: 3,
1916 alu_slots: 1,
1917 ..EMPTY_COST
1918 },
1919 )
1920 }
1921
1922 #[inline(always)]
1923 fn xnor(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1924 self.dispatch_3op(
1925 d,
1926 s1,
1927 s2,
1928 InstCost {
1929 latency: 2,
1930 decode_slots: 2 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
1931 alu_slots: 1,
1932 ..EMPTY_COST
1933 },
1934 );
1935 }
1936
1937 #[inline(always)]
1938 fn negate_and_add_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1939 self.dispatch_2op(
1940 d,
1941 s1,
1942 InstCost {
1943 latency: 2,
1944 decode_slots: 3,
1945 alu_slots: 1,
1946 ..EMPTY_COST
1947 },
1948 )
1949 }
1950
1951 #[inline(always)]
1952 fn negate_and_add_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
1953 self.dispatch_2op(
1954 d,
1955 s1,
1956 InstCost {
1957 latency: 3,
1958 decode_slots: 4,
1959 alu_slots: 1,
1960 ..EMPTY_COST
1961 },
1962 )
1963 }
1964
1965 #[inline(always)]
1966 fn move_reg(&mut self, _offset: u32, _args_length: u32, dst: RawReg, src: RawReg) -> Self::ReturnTy {
1967 self.dispatch_move_reg_avx2(dst, src);
1968 }
1969
1970 #[inline(always)]
1971 fn load_imm(&mut self, _offset: u32, _args_length: u32, dst: RawReg, _value: u32) -> Self::ReturnTy {
1972 self.dispatch_1op_dst(
1973 dst,
1974 InstCost {
1975 latency: 1,
1976 decode_slots: 1,
1977 ..EMPTY_COST
1978 },
1979 )
1980 }
1981
1982 #[inline(always)]
1983 fn load_imm64(&mut self, _offset: u32, _args_length: u32, dst: RawReg, _value: u64) -> Self::ReturnTy {
1984 self.dispatch_1op_dst(
1985 dst,
1986 InstCost {
1987 latency: 1,
1988 decode_slots: 2,
1989 ..EMPTY_COST
1990 },
1991 );
1992 }
1993
1994 #[inline(always)]
1995 fn mul_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
1996 self.dispatch_3op(
1997 d,
1998 s1,
1999 s2,
2000 InstCost {
2001 latency: 4,
2002 decode_slots: 2 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
2003 alu_slots: 1,
2004 mul_slots: 1,
2005 ..EMPTY_COST
2006 },
2007 )
2008 }
2009
2010 #[inline(always)]
2011 fn mul_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2012 self.dispatch_3op(
2013 d,
2014 s1,
2015 s2,
2016 InstCost {
2017 latency: 3,
2018 decode_slots: 1 + u32::from((d.get() != s1.get()) & (d.get() != s2.get())),
2019 alu_slots: 1,
2020 mul_slots: 1,
2021 ..EMPTY_COST
2022 },
2023 )
2024 }
2025
2026 #[inline(always)]
2027 fn mul_imm_32(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
2028 self.dispatch_2op(
2029 d,
2030 s1,
2031 InstCost {
2032 latency: 4,
2033 decode_slots: 2 + u32::from(d.get() != s1.get()),
2034 alu_slots: 1,
2035 mul_slots: 1,
2036 ..EMPTY_COST
2037 },
2038 )
2039 }
2040
2041 #[inline(always)]
2042 fn mul_imm_64(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, _s2: u32) -> Self::ReturnTy {
2043 self.dispatch_2op(
2044 d,
2045 s1,
2046 InstCost {
2047 latency: 3,
2048 decode_slots: 1 + u32::from(d.get() != s1.get()),
2049 alu_slots: 1,
2050 mul_slots: 1,
2051 ..EMPTY_COST
2052 },
2053 )
2054 }
2055
2056 #[inline(always)]
2057 fn mul_upper_signed_signed(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2058 self.dispatch_3op(
2059 d,
2060 s1,
2061 s2,
2062 InstCost {
2063 latency: 4,
2064 decode_slots: 4,
2065 alu_slots: 1,
2066 mul_slots: 1,
2067 ..EMPTY_COST
2068 },
2069 )
2070 }
2071
2072 #[inline(always)]
2073 fn mul_upper_unsigned_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2074 self.dispatch_3op(
2075 d,
2076 s1,
2077 s2,
2078 InstCost {
2079 latency: 4,
2080 decode_slots: 4,
2081 alu_slots: 1,
2082 mul_slots: 1,
2083 ..EMPTY_COST
2084 },
2085 )
2086 }
2087
2088 #[inline(always)]
2089 fn mul_upper_signed_unsigned(&mut self, _offset: u32, _args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy {
2090 self.dispatch_3op(
2091 d,
2092 s1,
2093 s2,
2094 InstCost {
2095 latency: 6,
2096 decode_slots: 4,
2097 alu_slots: 1,
2098 mul_slots: 1,
2099 ..EMPTY_COST
2100 },
2101 )
2102 }
2103
2104 #[cold]
2107 fn invalid(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2108 self.dispatch_finish(2);
2109 }
2110
2111 #[inline(always)]
2112 fn trap(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2113 self.dispatch_finish(2);
2114 }
2115
2116 #[inline(always)]
2117 fn fallthrough(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2118 self.dispatch_finish(2);
2119 }
2120
2121 #[inline(always)]
2122 fn unlikely(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2123 self.dispatch_generic(
2124 None,
2125 None,
2126 None,
2127 InstCost {
2128 latency: 40,
2129 decode_slots: 1,
2130 ..EMPTY_COST
2131 },
2132 );
2133 }
2134
2135 #[inline(always)]
2136 fn jump(&mut self, _offset: u32, _args_length: u32, _target: u32) -> Self::ReturnTy {
2137 self.dispatch_finish(15);
2138 }
2139
2140 #[inline(always)]
2141 fn load_imm_and_jump(&mut self, _offset: u32, _args_length: u32, _ra: RawReg, _value: u32, _target: u32) -> Self::ReturnTy {
2142 self.dispatch_finish(15);
2143 }
2144
2145 #[inline(always)]
2146 fn jump_indirect(&mut self, _offset: u32, _args_length: u32, base: RawReg, _base_offset: u32) -> Self::ReturnTy {
2147 self.dispatch_generic(
2148 None,
2149 Some(base),
2150 None,
2151 InstCost {
2152 latency: 22,
2153 decode_slots: 1,
2154 ..EMPTY_COST
2155 },
2156 );
2157 self.wait_until_empty();
2158 self.finished = true;
2159 }
2160
2161 #[inline(always)]
2162 fn load_imm_and_jump_indirect(
2163 &mut self,
2164 _offset: u32,
2165 _args_length: u32,
2166 _ra: RawReg,
2167 base: RawReg,
2168 _value: u32,
2169 _base_offset: u32,
2170 ) -> Self::ReturnTy {
2171 self.dispatch_generic(
2172 None,
2173 Some(base),
2174 None,
2175 InstCost {
2176 latency: 22,
2177 decode_slots: 1,
2178 ..EMPTY_COST
2179 },
2180 );
2181 self.wait_until_empty();
2182 self.finished = true;
2183 }
2184
2185 #[inline(always)]
2188 fn ecalli(&mut self, _offset: u32, _args_length: u32, _imm: u32) -> Self::ReturnTy {
2189 self.dispatch_generic(
2190 None,
2191 None,
2192 None,
2193 InstCost {
2194 latency: 100,
2195 decode_slots: 4,
2196 alu_slots: 1,
2197 ..EMPTY_COST
2198 },
2199 );
2200 }
2201
2202 #[inline(always)]
2203 fn sbrk(&mut self, _offset: u32, _args_length: u32, dst: RawReg, src: RawReg) -> Self::ReturnTy {
2204 self.dispatch_2op(
2206 dst,
2207 src,
2208 InstCost {
2209 latency: 100,
2210 decode_slots: 4,
2211 alu_slots: 1,
2212 ..EMPTY_COST
2213 },
2214 );
2215 }
2216
2217 #[inline(always)]
2218 fn memset(&mut self, _offset: u32, _args_length: u32) -> Self::ReturnTy {
2219 self.dispatch_generic(
2221 None,
2222 None,
2223 None,
2224 InstCost {
2225 latency: 100,
2226 decode_slots: 4,
2227 alu_slots: 1,
2228 ..EMPTY_COST
2229 },
2230 )
2231 }
2232}
2233
2234#[derive(Clone)]
2235#[non_exhaustive]
2236pub struct TimelineConfig<'a> {
2237 pub should_enable_fast_forward: bool,
2238 pub instruction_format: InstructionFormat<'a>,
2239}
2240
2241impl<'a> Default for TimelineConfig<'a> {
2242 fn default() -> Self {
2243 TimelineConfig {
2244 should_enable_fast_forward: false,
2245 instruction_format: InstructionFormat {
2246 is_64_bit: true,
2247 ..InstructionFormat::default()
2248 },
2249 }
2250 }
2251}
2252
2253pub fn timeline_for_instructions(
2254 code: &[u8],
2255 isa: InstructionSetKind,
2256 cache_model: CacheModel,
2257 instructions: &[crate::program::ParsedInstruction],
2258 config: TimelineConfig,
2259) -> (String, u32) {
2260 use alloc::collections::BTreeMap;
2261
2262 struct TimelineTracer<'a> {
2263 should_enable_fast_forward: bool,
2264 timeline: &'a mut BTreeMap<(u32, u32), EventKind>,
2265 }
2266
2267 impl<'a> Tracer for TimelineTracer<'a> {
2268 const SHOULD_CALL_ON_EVENT: bool = true;
2269
2270 fn should_enable_fast_forward(&self) -> bool {
2271 self.should_enable_fast_forward
2272 }
2273
2274 fn on_event(&mut self, cycle: u32, instruction: u32, event: EventKind) {
2275 match self.timeline.entry((cycle, instruction)) {
2276 alloc::collections::btree_map::Entry::Vacant(entry) => {
2277 #[cfg(all(test, feature = "logging"))]
2278 log::debug!(
2279 "on_event[{cycle}]: instruction={instruction} '{}' (event={event:?})",
2280 char::from(event)
2281 );
2282 entry.insert(event);
2283 }
2284 alloc::collections::btree_map::Entry::Occupied(entry) => {
2285 panic!(
2286 "duplicate timeline update: cycle={cycle} instruction={instruction} old_event={:?} new_event={event:?}",
2287 entry.get()
2288 );
2289 }
2290 }
2291 }
2292 }
2293
2294 let count = instructions
2295 .iter()
2296 .take_while(|inst| !inst.kind.opcode().starts_new_basic_block())
2297 .count();
2298
2299 let mut instructions = instructions[..(count + 1).min(instructions.len())].to_vec();
2300 if !instructions
2301 .last()
2302 .map(|instruction| instruction.kind.opcode().starts_new_basic_block())
2303 .unwrap_or(false)
2304 {
2305 let next_pc = instructions.last().map(|instruction| instruction.next_offset.0).unwrap_or(0);
2306 instructions.push(crate::program::ParsedInstruction {
2307 kind: crate::program::Instruction::invalid,
2308 offset: crate::program::ProgramCounter(next_pc),
2309 next_offset: crate::program::ProgramCounter(next_pc + 1),
2310 });
2311 }
2312
2313 let mut timeline_map = BTreeMap::new();
2314 let mut sim = Simulator::<B64, _>::new(
2315 code,
2316 isa,
2317 cache_model,
2318 TimelineTracer {
2319 should_enable_fast_forward: config.should_enable_fast_forward,
2320 timeline: &mut timeline_map,
2321 },
2322 );
2323
2324 for &instruction in &instructions {
2325 assert!(sim.take_block_cost().is_none());
2326 instruction.visit_parsing(&mut sim);
2327 }
2328
2329 let total_cycles = cast(sim.cycles).to_usize();
2330 let block_cost = sim.take_block_cost().unwrap();
2331 #[cfg(all(test, feature = "logging"))]
2332 log::debug!("Total cycles: {total_cycles}");
2333
2334 #[cfg(all(test, feature = "logging"))]
2335 log::debug!("Block cost: {block_cost}");
2336
2337 let mut timeline = vec!['.'; total_cycles * instructions.len()];
2338 for ((cycle, instruction), event) in timeline_map {
2339 let index = instruction as usize * total_cycles + cycle as usize;
2340 timeline[index] = char::from(event);
2341 }
2342
2343 let mut timeline_s = String::new();
2344 for (nth_instruction, instruction) in instructions.iter().enumerate() {
2345 use core::fmt::Write;
2346
2347 let line = &timeline[nth_instruction * total_cycles..(nth_instruction + 1) * total_cycles];
2348 timeline_s.extend(line.iter().copied());
2349 timeline_s.push_str(" ");
2350 writeln!(&mut timeline_s, "{}", instruction.display(&config.instruction_format)).unwrap();
2351 }
2352
2353 if config.should_enable_fast_forward {
2354 let mut timeline_new = String::with_capacity(timeline_s.len());
2355 let mut is_in_cycles = true;
2356 let mut last = '.';
2357 for mut ch in timeline_s.chars() {
2358 if ch == ' ' {
2359 is_in_cycles = false;
2360 } else if ch == '\n' {
2361 is_in_cycles = true;
2362 last = '.';
2363 } else if ch == '.' {
2364 if last != 'R' && last != 'D' && is_in_cycles {
2365 ch = last;
2366 }
2367 } else {
2368 last = ch;
2369 }
2370 timeline_new.push(ch);
2371 }
2372 timeline_s = timeline_new;
2373 }
2374
2375 (timeline_s, block_cost)
2376}
2377
2378pub fn trap_cost(isa: InstructionSetKind, cache_model: CacheModel) -> u32 {
2379 let mut sim = Simulator::<B64, _>::new(&[], isa, cache_model, ());
2380 crate::program::ParsedInstruction {
2381 kind: crate::program::Instruction::trap,
2382 offset: crate::program::ProgramCounter(0),
2383 next_offset: crate::program::ProgramCounter(0),
2384 }
2385 .visit_parsing(&mut sim);
2386 sim.take_block_cost().unwrap()
2387}
2388
2389#[cfg(test)]
2390mod tests {
2391 use alloc::string::String;
2392 use alloc::vec::Vec;
2393
2394 use super::{timeline_for_instructions, CacheModel, TimelineConfig};
2395 use crate::assembler::assemble;
2396 use crate::program::{InstructionSetKind, ProgramBlob};
2397
2398 #[cfg(test)]
2399 fn test_config() -> CacheModel {
2400 CacheModel::L1Hit
2401 }
2402
2403 #[cfg(test)]
2404 fn assert_timeline(config: CacheModel, program: &str, expected_timeline: &str) {
2405 use crate::cast::cast;
2406
2407 let _ = env_logger::try_init();
2408
2409 let program = assemble(Some(InstructionSetKind::Latest64), program).unwrap();
2410 let blob = ProgramBlob::parse(program.into()).unwrap();
2411 let instructions: Vec<_> = blob.instructions().collect();
2412
2413 let (timeline_s, cycles) = timeline_for_instructions(
2414 blob.code(),
2415 InstructionSetKind::Latest64,
2416 config,
2417 &instructions,
2418 TimelineConfig::default(),
2419 );
2420 let mut expected_timeline_s = String::new();
2421 let mut expected_cycles = 0;
2422 for line in expected_timeline.lines() {
2423 let line = line.trim();
2424 if line.is_empty() {
2425 continue;
2426 }
2427 expected_timeline_s.push_str(line);
2428 expected_timeline_s.push('\n');
2429
2430 expected_cycles = expected_cycles.max(line.split(" ").next().unwrap().len() as u32);
2431 }
2432
2433 if timeline_s != expected_timeline_s {
2434 panic!("Timeline mismatch!\n\nExpected timeline:\n{expected_timeline_s}\nActual timeline:\n{timeline_s}");
2435 }
2436
2437 let expected_cycles = cast(expected_cycles).to_signed() - 3;
2438 assert_eq!(cast(cycles).to_signed(), expected_cycles);
2439
2440 #[cfg(feature = "logging")]
2441 log::debug!("Rerunning with fast-forward enabled...");
2442
2443 let timeline_config = TimelineConfig {
2444 should_enable_fast_forward: true,
2445 ..TimelineConfig::default()
2446 };
2447 let (timeline_ff_s, cycles_ff) =
2448 timeline_for_instructions(blob.code(), InstructionSetKind::Latest64, config, &instructions, timeline_config);
2449 assert_eq!(cycles_ff, cycles);
2450 if timeline_ff_s != expected_timeline_s {
2451 panic!("Timeline mismatch for fast-forward!\n\nExpected timeline:\n{expected_timeline_s}\nActual timeline:\n{timeline_ff_s}");
2452 }
2453 }
2454
2455 #[test]
2456 fn test_parallel_simple() {
2457 assert_timeline(
2458 test_config(),
2459 "
2460 a0 = a1 + a2
2461 a1 = a1 + a2
2462 trap
2463 ",
2464 "
2465 DeER. a0 = a1 + a2
2466 DeER. a1 = a1 + a2
2467 DeeER trap
2468 ",
2469 );
2470 }
2471
2472 #[test]
2473 fn test_sequential_simple() {
2474 assert_timeline(
2475 test_config(),
2476 "
2477 a0 = a1 + a2
2478 a1 = a0 + a2
2479 trap
2480 ",
2481 "
2482 DeER.. a0 = a1 + a2
2483 D=eER. a1 = a0 + a2
2484 .DeeER trap
2485 ",
2486 );
2487 }
2488
2489 #[test]
2490 fn test_sequential_decode_limits() {
2491 assert_timeline(
2492 test_config(),
2493 "
2494 a0 = 0x12345678aabbccdd
2495 a1 = 0x12345678aabbccdd
2496 a2 = 0x12345678aabbccdd
2497 a3 = 0x12345678aabbccdd
2498 trap
2499 ",
2500 "
2501 DeER... a0 = 0x12345678aabbccdd
2502 DeER... a1 = 0x12345678aabbccdd
2503 .DeER.. a2 = 0x12345678aabbccdd
2504 .DeER.. a3 = 0x12345678aabbccdd
2505 ..DeeER trap
2506 ",
2507 );
2508 }
2509
2510 #[test]
2511 fn test_resource_limits_mul() {
2512 assert_timeline(
2513 test_config(),
2514 "
2515 a0 = a1 * a2
2516 a1 = a3 * a4
2517 trap
2518 ",
2519 "
2520 DeeeER... a0 = a1 * a2
2521 D===eeeER a1 = a3 * a4
2522 .DeeE---R trap
2523 ",
2524 );
2525 }
2526
2527 #[test]
2528 fn test_mul_with_dep() {
2529 assert_timeline(
2530 test_config(),
2531 "
2532 a0 = a1 + a2
2533 a4 = a0 * a3
2534 trap
2535 ",
2536 "
2537 DeER... a0 = a1 + a2
2538 D=eeeER a4 = a0 * a3
2539 .DeeE-R trap
2540 ",
2541 );
2542 }
2543
2544 #[test]
2545 fn test_register_move() {
2546 assert_timeline(
2547 test_config(),
2548 "
2549 s0 = 1
2550 a0 = s0
2551 a1 = a0 + 1
2552 trap
2553 ",
2554 "
2555 DeER.. s0 = 0x1
2556 D..... a0 = s0
2557 D=eER. a1 = a0 + 0x1
2558 .DeeER trap
2559 ",
2560 )
2561 }
2562
2563 #[test]
2564 fn test_memory_accesses() {
2565 assert_timeline(
2566 test_config(),
2567 "
2568 a0 = s1
2569 ra = u64 [sp + 0x30]
2570 s0 = u64 [sp + 0x28]
2571 s1 = u64 [sp + 0x20]
2572 sp = sp + 0x38
2573 ret
2574 ",
2575 "
2576 D............................ a0 = s1
2577 DeeeeER...................... ra = u64 [sp + 0x30]
2578 DeeeeER...................... s0 = u64 [sp + 0x28]
2579 DeeeeER...................... s1 = u64 [sp + 0x20]
2580 .DeE--R...................... sp = sp + 0x38
2581 .D===eeeeeeeeeeeeeeeeeeeeeeER ret
2582 ",
2583 )
2584 }
2585
2586 #[test]
2587 fn test_empty() {
2588 assert_timeline(
2589 test_config(),
2590 "
2591 fallthrough
2592 ",
2593 "
2594 DeeER fallthrough
2595 ",
2596 );
2597 }
2598
2599 #[test]
2600 fn test_overwrite_register() {
2601 assert_timeline(
2602 test_config(),
2603 "
2604 s0 = u64 [sp]
2605 s0 = a1 + a2
2606 s0 = u64 [s0]
2607 jump [s0]
2608 ",
2609 "
2610 DeeeeER....................... s0 = u64 [sp]
2611 DeE---R....................... s0 = a1 + a2
2612 D=eeeeER...................... s0 = u64 [s0]
2613 .D====eeeeeeeeeeeeeeeeeeeeeeER jump [s0]
2614 ",
2615 );
2616 }
2617
2618 #[test]
2619 fn test_load_and_jump() {
2620 assert_timeline(
2621 test_config(),
2622 "
2623 @0:
2624 a2 = u8 [a0 + 11]
2625 jump @0 if a2 == 0
2626 ",
2627 "
2628 DeeeeER. a2 = u8 [a0 + 0xb]
2629 D====eER jump 0 if a2 == 0
2630 ",
2631 );
2632 }
2633
2634 #[test]
2635 fn test_complex() {
2636 assert_timeline(
2637 test_config(),
2638 "
2639 a2 = i16 [a0 + 0x6]
2640 a1 = a1 & 0x7
2641 a3 = 0x1
2642 a1 = a1 << 0x8
2643 a2 = a2 & 0xfffffffffffff8ff
2644 a1 = a1 | a2
2645 a2 = a1 + a3
2646 u8 [a0 + 0x2] = a3
2647 trap
2648 ",
2649 "
2650 DeeeeER....................... a2 = i16 [a0 + 0x6]
2651 DeE---R....................... a1 = a1 & 0x7
2652 DeE---R....................... a3 = 0x1
2653 D=eE--R....................... a1 = a1 << 0x8
2654 .D===eER...................... a2 = a2 & 0xfffffffffffff8ff
2655 .D====eER..................... a1 = a1 | a2
2656 .D=====eER.................... a2 = a1 + a3
2657 ..DeeeeeeeeeeeeeeeeeeeeeeeeeER u8 [a0 + 0x2] = a3
2658 ..DeeE-----------------------R trap
2659 ",
2660 );
2661 }
2662
2663 #[test]
2664 fn test_even_more_complex() {
2665 assert_timeline(
2666 test_config(),
2667 "
2668 @0:
2669 i32 a1 = clz a0
2670 i32 a0 = a0 << a1
2671 a1 = a1 << 0x17
2672 i32 a2 = a0 >> 0x8
2673 a3 = a0 >> 0x7
2674 a3 = a3 & ~a2
2675 i32 a2 = a2 - a1
2676 a0 = a0 << 0x18
2677 a3 = a3 & 0x1
2678 i32 a0 = a0 - a3
2679 i32 a0 = a0 >> 0x1f
2680 a1 = a2 + 0x4e800000
2681 i32 a0 = a0 + a1
2682 a1 = 0x46008c00
2683 ra = 0x24
2684 jump @0
2685 ",
2686 "
2687 DeER..................... i32 a1 = clz a0
2688 D=eeER................... i32 a0 = a0 << a1
2689 .DeE-R................... a1 = a1 << 0x17
2690 .D==eeER................. i32 a2 = a0 >> 0x8
2691 ..D=eE-R................. a3 = a0 >> 0x7
2692 ...D==eeER............... a3 = a3 & ~a2
2693 ....D=eeER............... i32 a2 = a2 - a1
2694 ....DeE--R............... a0 = a0 << 0x18
2695 ....D===eER.............. a3 = a3 & 0x1
2696 .....D===eeER............ i32 a0 = a0 - a3
2697 .....D=====eeER.......... i32 a0 = a0 >> 0x1f
2698 ......D=eE----R.......... a1 = a2 + 0x4e800000
2699 ......D======eeER........ i32 a0 = a0 + a1
2700 .......DeE------R........ a1 = 0x46008c00
2701 .......DeE------R........ ra = 0x24
2702 .......DeeeeeeeeeeeeeeeER jump 0
2703 ",
2704 );
2705 }
2706
2707 #[test]
2708 fn test_super_complex_l1() {
2709 assert_timeline(
2710 CacheModel::L1Hit,
2711 "
2712 @0:
2713 unlikely
2714 t1 = u8 [s0]
2715 a1 = u8 [s0 + 0x11]
2716 a2 = 0x172d0
2717 a3 = u8 [s0 + 0x16]
2718 t0 = sp + 0x58
2719 a1 = a1 << 0x3
2720 a1 = a1 + a2
2721 a2 = u8 [a1]
2722 a5 = u8 [a1 + 0x1]
2723 s1 = u8 [a1 + 0x2]
2724 a4 = u8 [a1 + 0x3]
2725 a3 = a3 + t0
2726 a5 = a5 << 0x8
2727 s1 = s1 << 0x10
2728 a4 = a4 << 0x18
2729 a2 = a2 | a5
2730 a5 = u8 [a1 + 0x4]
2731 a0 = u8 [a1 + 0x5]
2732 a4 = a4 | s1
2733 s1 = u8 [a1 + 0x6]
2734 a1 = u8 [a1 + 0x7]
2735 a0 = a0 << 0x8
2736 a0 = a0 | a5
2737 s1 = s1 << 0x10
2738 a1 = a1 << 0x18
2739 a1 = a1 | s1
2740 a2 = a2 | a4
2741 a0 = a0 | a1
2742 a1 = s0 - t1
2743 a0 = a0 << 0x20
2744 a0 = a0 | a2
2745 u64 [sp + 0x58] = a0
2746 a0 = u8 [a3]
2747 a1 = u8 [a1 + 0x4]
2748 a0 = a1 * a0
2749 a1 = u8 [s0 + 0x23]
2750 jump @0 if a1 != 0
2751 ",
2752 "
2753 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER............................ unlikely
2754 DeeeeE------------------------------------R............................ t1 = u8 [s0]
2755 DeeeeE------------------------------------R............................ a1 = u8 [s0 + 0x11]
2756 DeE---------------------------------------R............................ a2 = 0x172d0
2757 .DeeeeE-----------------------------------R............................ a3 = u8 [s0 + 0x16]
2758 .DeE--------------------------------------R............................ t0 = sp + 0x58
2759 .D===eE-----------------------------------R............................ a1 = a1 << 0x3
2760 ..D===eE----------------------------------R............................ a1 = a1 + a2
2761 ..D====eeeeE------------------------------R............................ a2 = u8 [a1]
2762 ..D====eeeeE------------------------------R............................ a5 = u8 [a1 + 0x1]
2763 ..D====eeeeE------------------------------R............................ s1 = u8 [a1 + 0x2]
2764 ...D===eeeeE------------------------------R............................ a4 = u8 [a1 + 0x3]
2765 ...D==eE----------------------------------R............................ a3 = a3 + t0
2766 ...D=======eE-----------------------------R............................ a5 = a5 << 0x8
2767 ...D=======eE-----------------------------R............................ s1 = s1 << 0x10
2768 ....D======eE-----------------------------R............................ a4 = a4 << 0x18
2769 ....D=======eE----------------------------R............................ a2 = a2 | a5
2770 ....D======eeeeE--------------------------R............................ a5 = u8 [a1 + 0x4]
2771 ....D=======eeeeE-------------------------R............................ a0 = u8 [a1 + 0x5]
2772 .....D======eE----------------------------R............................ a4 = a4 | s1
2773 .....D=======eeeeE------------------------R............................ s1 = u8 [a1 + 0x6]
2774 .....D=======eeeeE------------------------R............................ a1 = u8 [a1 + 0x7]
2775 .....D==========eE------------------------R............................ a0 = a0 << 0x8
2776 ......D==========eE-----------------------R............................ a0 = a0 | a5
2777 ......D==========eE-----------------------R............................ s1 = s1 << 0x10
2778 ......D==========eE-----------------------R............................ a1 = a1 << 0x18
2779 ......D===========eE----------------------R............................ a1 = a1 | s1
2780 .......D=======eE-------------------------R............................ a2 = a2 | a4
2781 .......D===========eE---------------------R............................ a0 = a0 | a1
2782 .......D========eE------------------------R............................ a1 = s0 - t1
2783 ........D===========eE--------------------R............................ a0 = a0 << 0x20
2784 ........D============eE-------------------R............................ a0 = a0 | a2
2785 ...........................................DeeeeeeeeeeeeeeeeeeeeeeeeeER u64 [sp + 0x58] = a0
2786 ...........................................DeeeeE---------------------R a0 = u8 [a3]
2787 ...........................................DeeeeE---------------------R a1 = u8 [a1 + 0x4]
2788 ...........................................D====eeeE------------------R a0 = a1 * a0
2789 ............................................DeeeeE--------------------R a1 = u8 [s0 + 0x23]
2790 ............................................D====eE-------------------R jump 0 if a1 != 0
2791 ",
2792 );
2793 }
2794
2795 #[test]
2796 fn test_super_complex_l2() {
2797 assert_timeline(
2798 CacheModel::L2Hit,
2799 "
2800 @0:
2801 unlikely
2802 t1 = u8 [s0]
2803 a1 = u8 [s0 + 0x11]
2804 a2 = 0x172d0
2805 a3 = u8 [s0 + 0x16]
2806 t0 = sp + 0x58
2807 a1 = a1 << 0x3
2808 a1 = a1 + a2
2809 a2 = u8 [a1]
2810 a5 = u8 [a1 + 0x1]
2811 s1 = u8 [a1 + 0x2]
2812 a4 = u8 [a1 + 0x3]
2813 a3 = a3 + t0
2814 a5 = a5 << 0x8
2815 s1 = s1 << 0x10
2816 a4 = a4 << 0x18
2817 a2 = a2 | a5
2818 a5 = u8 [a1 + 0x4]
2819 a0 = u8 [a1 + 0x5]
2820 a4 = a4 | s1
2821 s1 = u8 [a1 + 0x6]
2822 a1 = u8 [a1 + 0x7]
2823 a0 = a0 << 0x8
2824 a0 = a0 | a5
2825 s1 = s1 << 0x10
2826 a1 = a1 << 0x18
2827 a1 = a1 | s1
2828 a2 = a2 | a4
2829 a0 = a0 | a1
2830 a1 = s0 - t1
2831 a0 = a0 << 0x20
2832 a0 = a0 | a2
2833 u64 [sp + 0x58] = a0
2834 a0 = u8 [a3]
2835 a1 = u8 [a1 + 0x4]
2836 a0 = a1 * a0
2837 a1 = u8 [s0 + 0x23]
2838 jump @0 if a1 != 0
2839 ",
2840 "
2841 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER..................................................................... unlikely
2842 DeeeeeeeeeeeeeeeeeeeeeeeeeE---------------R..................................................................... t1 = u8 [s0]
2843 DeeeeeeeeeeeeeeeeeeeeeeeeeE---------------R..................................................................... a1 = u8 [s0 + 0x11]
2844 DeE---------------------------------------R..................................................................... a2 = 0x172d0
2845 .DeeeeeeeeeeeeeeeeeeeeeeeeeE--------------R..................................................................... a3 = u8 [s0 + 0x16]
2846 .DeE--------------------------------------R..................................................................... t0 = sp + 0x58
2847 .D========================eE--------------R..................................................................... a1 = a1 << 0x3
2848 ..D========================eE-------------R..................................................................... a1 = a1 + a2
2849 ..D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... a2 = u8 [a1]
2850 ..D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... a5 = u8 [a1 + 0x1]
2851 ..D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... s1 = u8 [a1 + 0x2]
2852 ...D========================eeeeeeeeeeeeeeeeeeeeeeeeeER......................................................... a4 = u8 [a1 + 0x3]
2853 ...D=======================eE-------------------------R......................................................... a3 = a3 + t0
2854 ...D=================================================eER........................................................ a5 = a5 << 0x8
2855 ...D=================================================eER........................................................ s1 = s1 << 0x10
2856 ....D================================================eER........................................................ a4 = a4 << 0x18
2857 ....D=================================================eER....................................................... a2 = a2 | a5
2858 ....D================================================eeeeeeeeeeeeeeeeeeeeeeeeeER................................ a5 = u8 [a1 + 0x4]
2859 ....D=================================================eeeeeeeeeeeeeeeeeeeeeeeeeER............................... a0 = u8 [a1 + 0x5]
2860 .....D================================================eE------------------------R............................... a4 = a4 | s1
2861 .....D=================================================eeeeeeeeeeeeeeeeeeeeeeeeeER.............................. s1 = u8 [a1 + 0x6]
2862 .....D=================================================eeeeeeeeeeeeeeeeeeeeeeeeeER.............................. a1 = u8 [a1 + 0x7]
2863 .....D=========================================================================eER.............................. a0 = a0 << 0x8
2864 ......D=========================================================================eER............................. a0 = a0 | a5
2865 ......D=========================================================================eER............................. s1 = s1 << 0x10
2866 ......D=========================================================================eER............................. a1 = a1 << 0x18
2867 ......D==========================================================================eER............................ a1 = a1 | s1
2868 .......D======================================================================eE---R............................ a2 = a2 | a4
2869 .......D==========================================================================eER........................... a0 = a0 | a1
2870 .......D==================eE--------------------------------------------------------R........................... a1 = s0 - t1
2871 ........D==========================================================================eER.......................... a0 = a0 << 0x20
2872 ........D===========================================================================eER......................... a0 = a0 | a2
2873 ...........................................D=========================================eeeeeeeeeeeeeeeeeeeeeeeeeER u64 [sp + 0x58] = a0
2874 ...........................................D===================================eeeeeeeeeeeeeeeeeeeeeeeeeE------R a0 = u8 [a3]
2875 ...........................................D=====================================eeeeeeeeeeeeeeeeeeeeeeeeeE----R a1 = u8 [a1 + 0x4]
2876 ...........................................D==============================================================eeeE-R a0 = a1 * a0
2877 ............................................D====================================eeeeeeeeeeeeeeeeeeeeeeeeeE----R a1 = u8 [s0 + 0x23]
2878 ............................................D=============================================================eE---R jump 0 if a1 != 0
2879 ",
2880 );
2881 }
2882
2883 #[test]
2884 fn test_l3_loads() {
2885 assert_timeline(CacheModel::L3Hit,
2886 "
2887 a0 = u64 [a0]
2888 a0 = u64 [a0]
2889 a0 = u64 [a0]
2890 a0 = u64 [a0]
2891 ret
2892 ",
2893 "
2894 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER............................................................................................................... a0 = u64 [a0]
2895 D=====================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER.......................................................................... a0 = u64 [a0]
2896 D==========================================================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER..................................... a0 = u64 [a0]
2897 D===============================================================================================================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER a0 = u64 [a0]
2898 .DeeeeeeeeeeeeeeeeeeeeeeE-----------------------------------------------------------------------------------------------------------------------------R ret
2899 ",
2900 )
2901 }
2902
2903 #[test]
2904 fn test_ecalli() {
2905 assert_timeline(
2906 test_config(),
2907 "
2908 ecalli 27
2909 ret
2910 ",
2911 "
2912 DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER ecalli 27
2913 .DeeeeeeeeeeeeeeeeeeeeeeE-----------------------------------------------------------------------------R ret
2914 ",
2915 );
2916 }
2917
2918 #[test]
2919 fn test_xor_and_shift() {
2920 assert_timeline(
2921 test_config(),
2922 "
2923 a1 = a1 ^ 0xffffffffffffffff
2924 a1 = a0 >> a1
2925 fallthrough
2926 ",
2927 "
2928 DeER.. a1 = a1 ^ 0xffffffffffffffff
2929 D=eER. a1 = a0 >> a1
2930 .DeeER fallthrough
2931 ",
2932 )
2933 }
2934
2935 #[test]
2936 fn test_move_reg_decode_slots() {
2937 assert_timeline(
2938 test_config(),
2939 "
2940 s0 = a1
2941 a0 = a1
2942 a1 = t0
2943 a2 = s1
2944 trap
2945 ",
2946 "
2947 D..... s0 = a1
2948 D..... a0 = a1
2949 D..... a1 = t0
2950 D..... a2 = s1
2951 .DeeER trap
2952 ",
2953 )
2954 }
2955}