1use crate::program::{Instruction, InstructionSetKind, RawReg, Reg};
2use crate::utils::{parse_imm, parse_immediate, parse_reg, parse_slice, ParsedImmediate};
3use alloc::borrow::ToOwned;
4use alloc::collections::BTreeMap;
5use alloc::format;
6use alloc::string::String;
7use alloc::vec::Vec;
8
9fn split<'a>(text: &'a str, separator: &str) -> Option<(&'a str, &'a str)> {
10 let index = text.find(separator)?;
11 Some((text[..index].trim(), text[index + separator.len()..].trim()))
12}
13
14fn parse_reg_or_imm(text: &str) -> Option<RegImm> {
15 if let Some(value) = parse_imm(text) {
16 Some(RegImm::Imm(value))
17 } else {
18 parse_reg(text).map(RegImm::Reg)
19 }
20}
21
22fn parse_absolute_memory_access(text: &str) -> Option<i32> {
23 let text = text.trim().strip_prefix('[')?.strip_suffix(']')?;
24 parse_imm(text)
25}
26
27fn parse_indirect_memory_access(text: &str) -> Option<(Reg, i32)> {
28 let text = text.trim().strip_prefix('[')?.strip_suffix(']')?;
29 if let Some(index) = text.find('+') {
30 let reg = parse_reg(text[..index].trim())?;
31 let offset = parse_imm(&text[index + 1..])?;
32 Some((reg, offset))
33 } else {
34 parse_reg(text).map(|reg| (reg, 0))
35 }
36}
37
38fn parse_load_imm_and_jump_indirect_with_tmp(line: &str) -> Option<(Reg, Reg, i32, i32)> {
41 let line = line.trim().strip_prefix("tmp")?;
42 if !line.starts_with('=') && line.trim_start() == line {
43 return None;
44 }
45 let line = line.trim().strip_prefix('=')?;
46
47 let index = line.find(',')?;
48 let base = parse_reg(line[..index].trim())?;
49 let line = line[index + 1..].trim();
50
51 let index = line.find('=')?;
52 let dst = parse_reg(line[..index].trim())?;
53 let line = line[index + 1..].trim();
54
55 let index = line.find(',')?;
56 let value = parse_imm(line[..index].trim())?;
57 let line = line[index + 1..].trim().strip_prefix("jump")?;
58 let text = line.trim().strip_prefix('[')?.strip_suffix(']')?;
59
60 if let Some(index) = text.find('+') {
61 if text[..index].trim() != "tmp" {
62 return None;
63 }
64 let offset = parse_imm(&text[index + 1..])?;
65 Some((dst, base, value, offset))
66 } else {
67 if text.trim() != "tmp" {
68 return None;
69 }
70 Some((dst, base, value, 0))
71 }
72}
73
74#[derive(Copy, Clone)]
75pub enum OpMarker {
76 I32,
77 NONE,
78}
79
80#[derive(Copy, Clone)]
81pub enum LoadKind {
82 I8,
83 I16,
84 I32,
85 U8,
86 U16,
87 U32,
88 U64,
89}
90
91#[derive(Copy, Clone)]
92pub enum StoreKind {
93 U8,
94 U16,
95 U32,
96 U64,
97}
98
99#[derive(Copy, Clone)]
100enum ConditionKind {
101 Eq,
102 NotEq,
103 LessSigned,
104 LessUnsigned,
105 LessOrEqualSigned,
106 LessOrEqualUnsigned,
107 GreaterSigned,
108 GreaterUnsigned,
109 GreaterOrEqualSigned,
110 GreaterOrEqualUnsigned,
111}
112
113impl ConditionKind {
114 fn reverse_operands(self) -> Self {
115 match self {
116 Self::Eq => Self::Eq,
117 Self::NotEq => Self::NotEq,
118 Self::LessSigned => Self::GreaterSigned,
119 Self::LessUnsigned => Self::GreaterUnsigned,
120 Self::LessOrEqualSigned => Self::GreaterOrEqualSigned,
121 Self::LessOrEqualUnsigned => Self::GreaterOrEqualUnsigned,
122 Self::GreaterSigned => Self::LessSigned,
123 Self::GreaterUnsigned => Self::LessUnsigned,
124 Self::GreaterOrEqualSigned => Self::LessOrEqualSigned,
125 Self::GreaterOrEqualUnsigned => Self::LessOrEqualUnsigned,
126 }
127 }
128}
129
130#[derive(Copy, Clone)]
131enum RegImm {
132 Reg(Reg),
133 Imm(i32),
134}
135
136#[derive(Copy, Clone)]
137struct Condition {
138 kind: ConditionKind,
139 lhs: RegImm,
140 rhs: RegImm,
141}
142
143fn parse_condition(text: &str) -> Option<Condition> {
144 let text = text.trim();
145 let (lhs, text) = split(text, " ")?;
146 let lhs = parse_reg_or_imm(lhs)?;
147 let (kind, text) = split(text, " ")?;
148 let kind = match kind {
149 "<u" => ConditionKind::LessUnsigned,
150 "<s" => ConditionKind::LessSigned,
151 "<=u" => ConditionKind::LessOrEqualUnsigned,
152 "<=s" => ConditionKind::LessOrEqualSigned,
153 ">u" => ConditionKind::GreaterUnsigned,
154 ">s" => ConditionKind::GreaterSigned,
155 ">=u" => ConditionKind::GreaterOrEqualUnsigned,
156 ">=s" => ConditionKind::GreaterOrEqualSigned,
157 "==" => ConditionKind::Eq,
158 "!=" => ConditionKind::NotEq,
159 _ => return None,
160 };
161
162 let rhs = parse_reg_or_imm(text)?;
163 Some(Condition { kind, lhs, rhs })
164}
165
166pub fn assemble(mut isa: Option<InstructionSetKind>, code: &str) -> Result<Vec<u8>, String> {
167 enum MaybeInstruction {
168 Instruction(Instruction),
169 Jump(String),
170 Branch(String, ConditionKind, Reg, Reg),
171 BranchImm(String, ConditionKind, Reg, i32),
172 LoadLabelAddress(Reg, String),
173 LoadImmAndJump(Reg, u32, String),
174 }
175
176 impl MaybeInstruction {
177 fn starts_new_basic_block(&self) -> bool {
178 match self {
179 MaybeInstruction::Instruction(instruction) => instruction.starts_new_basic_block(),
180 MaybeInstruction::Jump(..)
181 | MaybeInstruction::Branch(..)
182 | MaybeInstruction::BranchImm(..)
183 | MaybeInstruction::LoadImmAndJump(..) => true,
184 MaybeInstruction::LoadLabelAddress(..) => false,
185 }
186 }
187 }
188
189 impl From<Instruction> for MaybeInstruction {
190 fn from(inst: Instruction) -> Self {
191 MaybeInstruction::Instruction(inst)
192 }
193 }
194
195 enum Export {
196 ByBlock(u32),
197 ByInstruction(u32),
198 }
199
200 let mut instructions: Vec<MaybeInstruction> = Vec::new();
201 let mut label_to_index = BTreeMap::new();
202 let mut at_block_start = true;
203 let mut current_basic_block = 0;
204 let mut exports = BTreeMap::new();
205 let mut ro_data = Vec::new();
206 let mut rw_data = Vec::new();
207 let mut ro_data_size = 0;
208 let mut rw_data_size = 0;
209 let mut stack_size = 0;
210
211 macro_rules! emit_and_continue {
212 ($instruction:expr) => {{
213 let instruction: MaybeInstruction = $instruction.into();
214 at_block_start = instruction.starts_new_basic_block();
215 if at_block_start {
216 current_basic_block += 1;
217 }
218
219 instructions.push(instruction);
220 continue;
221 }};
222 }
223
224 for (nth_line, line) in code.lines().enumerate() {
225 let nth_line = nth_line + 1; let line = line.trim();
227 let original_line = line;
228
229 if line.is_empty() || line.starts_with("//") {
230 continue;
231 }
232
233 if let Some(line) = line.strip_prefix("%ro_data_size = ") {
234 let line = line.trim();
235 let Ok(size) = line.parse::<u32>() else {
236 return Err(format!("cannot parse line {nth_line}"));
237 };
238 ro_data_size = size;
239 continue;
240 }
241
242 if let Some(line) = line.strip_prefix("%rw_data_size = ") {
243 let line = line.trim();
244 let Ok(size) = line.parse::<u32>() else {
245 return Err(format!("cannot parse line {nth_line}"));
246 };
247 rw_data_size = size;
248 continue;
249 }
250
251 if let Some(line) = line.strip_prefix("%stack_size = ") {
252 let line = line.trim();
253 let Ok(size) = line.parse::<u32>() else {
254 return Err(format!("cannot parse line {nth_line}"));
255 };
256 stack_size = size;
257 continue;
258 }
259
260 if let Some(line) = line.strip_prefix("%ro_data = ") {
261 let Some(value) = parse_slice(line) else {
262 return Err(format!("cannot parse line {nth_line}"));
263 };
264
265 ro_data = value;
266 continue;
267 }
268
269 if let Some(line) = line.strip_prefix("%rw_data = ") {
270 let Some(value) = parse_slice(line) else {
271 return Err(format!("cannot parse line {nth_line}"));
272 };
273
274 rw_data = value;
275 continue;
276 }
277
278 if let Some(line) = line.strip_prefix("%isa = ") {
279 isa = Some(match line.trim() {
280 "revive_v1" => InstructionSetKind::ReviveV1,
281 "latest32" => InstructionSetKind::Latest32,
282 "latest64" => InstructionSetKind::Latest64,
283 _ => return Err(format!("cannot parse line {nth_line}")),
284 });
285 continue;
286 }
287
288 if let Some((is_export, mut line)) = line
289 .strip_prefix("pub @")
290 .map(|line| (true, line))
291 .or_else(|| line.strip_prefix('@').map(|line| (false, line)))
292 {
293 let mut no_fallthrough = false;
294 if let Some(line_no_fallthrough) = line.strip_suffix("%no_fallthrough") {
295 no_fallthrough = true;
296 line = line_no_fallthrough.trim();
297 }
298
299 if let Some(label) = line.strip_suffix(':') {
300 if !at_block_start && !no_fallthrough {
301 instructions.push(Instruction::fallthrough.into());
302 at_block_start = true;
303 current_basic_block += 1;
304 }
305
306 if label_to_index.insert(label, current_basic_block).is_some() {
307 return Err(format!("duplicate label \"{label}\" on line {nth_line}"));
308 }
309
310 if is_export {
311 if at_block_start {
312 exports.insert(label, Export::ByBlock(current_basic_block));
313 } else {
314 exports.insert(label, Export::ByInstruction(instructions.len() as u32));
315 }
316 }
317
318 continue;
319 }
320 }
321
322 if line == "trap" {
323 emit_and_continue!(Instruction::trap);
324 }
325
326 if line == "fallthrough" {
327 emit_and_continue!(Instruction::fallthrough);
328 }
329
330 if line == "unlikely" {
331 emit_and_continue!(Instruction::unlikely);
332 }
333
334 if line == "ret" {
335 emit_and_continue!(Instruction::jump_indirect(Reg::RA.into(), 0));
336 }
337
338 if line == "nop" {
339 emit_and_continue!(Instruction::move_reg(Reg::RA.into(), Reg::RA.into()));
340 }
341
342 if let Some(line) = line.strip_prefix("ecalli ") {
343 let line = line.trim();
344 if let Ok(index) = line.parse::<u32>() {
345 emit_and_continue!(Instruction::ecalli(index));
346 }
347 }
348
349 if let Some(line) = line.strip_prefix("jump ") {
350 let line = line.trim();
351 if let Some(line) = line.strip_prefix('@') {
352 if let Some(index) = line.find(' ') {
353 let label = &line[..index];
354 let line = &line[index + 1..].trim();
355 let Some(line) = line.strip_prefix("if ") else {
356 return Err(format!("cannot parse line {nth_line}: \"{original_line}\""));
357 };
358
359 let line = line.trim();
360 let Some(condition) = parse_condition(line) else {
361 return Err(format!("cannot parse line {nth_line}: invalid condition"));
362 };
363
364 let (kind, lhs, rhs) = match (condition.lhs, condition.rhs) {
365 (RegImm::Reg(lhs), RegImm::Reg(rhs)) => {
366 emit_and_continue!(MaybeInstruction::Branch(label.to_owned(), condition.kind, lhs, rhs));
367 }
368 (RegImm::Reg(lhs), RegImm::Imm(rhs)) => (condition.kind, lhs, rhs),
369 (RegImm::Imm(lhs), RegImm::Reg(rhs)) => (condition.kind.reverse_operands(), rhs, lhs),
370 (RegImm::Imm(_), RegImm::Imm(_)) => {
371 return Err(format!("cannot parse line {nth_line}: both arguments cannot be immediates"));
372 }
373 };
374
375 emit_and_continue!(MaybeInstruction::BranchImm(label.to_owned(), kind, lhs, rhs));
376 }
377
378 emit_and_continue!(MaybeInstruction::Jump(line.to_owned()));
379 }
380
381 if let Some((base, offset)) = parse_indirect_memory_access(line) {
382 emit_and_continue!(Instruction::jump_indirect(base.into(), offset as u32));
383 }
384 }
385
386 if let Some((dst, base, value, offset)) = parse_load_imm_and_jump_indirect_with_tmp(line) {
387 emit_and_continue!(Instruction::load_imm_and_jump_indirect(
388 dst.into(),
389 base.into(),
390 value as u32,
391 offset as u32
392 ));
393 }
394
395 if let Some(index) = line.find('=') {
396 let lhs = line[..index].trim();
397 let rhs = line[index + 1..].trim();
398
399 let (op_marker, lhs) = if let Some(lhs) = lhs.strip_prefix("i32 ") {
400 (OpMarker::I32, lhs)
401 } else {
402 (OpMarker::NONE, lhs)
403 };
404
405 if let Some(dst) = parse_reg(lhs) {
406 if let Some(index) = rhs.find(',') {
407 if let Some(value) = parse_immediate(&rhs[..index]).and_then(|value| value.try_into().ok()) {
408 if let Some(line) = rhs[index + 1..].trim().strip_prefix("jump") {
409 if let Some(label) = line.trim().strip_prefix('@') {
410 emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, label.to_owned()));
411 }
412 if let Some((base, offset)) = parse_indirect_memory_access(line) {
413 let instruction = Instruction::load_imm_and_jump_indirect(dst.into(), base.into(), value, offset as u32);
414
415 if dst == base {
416 return Err(format!("cannot parse line {nth_line}, expected: \"{instruction}\""));
417 }
418
419 emit_and_continue!(instruction);
420 }
421 }
422 }
423 }
424
425 if let Some(index) = rhs.find("if ") {
426 if let Some(src) = parse_reg_or_imm(&rhs[..index]) {
427 if let Some(condition) = parse_condition(&rhs[index + 3..]) {
428 if let (RegImm::Reg(cond), RegImm::Imm(0)) = (condition.lhs, condition.rhs) {
429 let inst = match (src, condition.kind) {
430 (RegImm::Reg(src), ConditionKind::Eq) => {
431 Some(Instruction::cmov_if_zero(dst.into(), src.into(), cond.into()))
432 }
433 (RegImm::Reg(src), ConditionKind::NotEq) => {
434 Some(Instruction::cmov_if_zero(dst.into(), src.into(), cond.into()))
435 }
436 (RegImm::Imm(src), ConditionKind::Eq) => {
437 Some(Instruction::cmov_if_zero_imm(dst.into(), cond.into(), src as u32))
438 }
439 (RegImm::Imm(src), ConditionKind::NotEq) => {
440 Some(Instruction::cmov_if_zero_imm(dst.into(), cond.into(), src as u32))
441 }
442 _ => None,
443 };
444
445 if let Some(inst) = inst {
446 emit_and_continue!(inst);
447 }
448 }
449 }
450 }
451 }
452
453 if let Some((name, rhs)) = split(rhs, " ") {
454 if let Some(src) = parse_reg(rhs) {
455 type F = fn(RawReg, RawReg) -> Instruction;
456 let ctor = match (name, op_marker) {
457 ("cpop", OpMarker::I32) => Some(Instruction::count_set_bits_32 as F),
458 ("cpop", OpMarker::NONE) => Some(Instruction::count_set_bits_64 as F),
459 ("clz", OpMarker::I32) => Some(Instruction::count_leading_zero_bits_32 as F),
460 ("clz", OpMarker::NONE) => Some(Instruction::count_leading_zero_bits_64 as F),
461 ("ctz", OpMarker::I32) => Some(Instruction::count_trailing_zero_bits_32 as F),
462 ("ctz", OpMarker::NONE) => Some(Instruction::count_trailing_zero_bits_64 as F),
463 ("sext8", _) => Some(Instruction::sign_extend_8 as F),
464 ("sext16", _) => Some(Instruction::sign_extend_16 as F),
465 ("zext16", _) => Some(Instruction::zero_extend_16 as F),
466 ("reverse", _) => Some(Instruction::reverse_byte as F),
467 _ => None,
468 };
469
470 if let Some(ctor) = ctor {
471 emit_and_continue!(ctor(dst.into(), src.into()));
472 }
473 }
474 }
475
476 if let Some(src) = parse_reg(rhs) {
477 emit_and_continue!(Instruction::move_reg(dst.into(), src.into()));
478 }
479
480 if let Some(instr) = parse_immediate(rhs) {
481 match instr {
482 ParsedImmediate::U32(value) => {
483 emit_and_continue!(Instruction::load_imm(dst.into(), value));
484 }
485 ParsedImmediate::U64(value) => {
486 emit_and_continue!(Instruction::load_imm64(dst.into(), value));
487 }
488 }
489 }
490
491 if let Some(label) = rhs.strip_prefix('@') {
492 emit_and_continue!(MaybeInstruction::LoadLabelAddress(dst, label.to_owned()));
493 }
494
495 if let Some(rhs) = rhs.strip_prefix("~(") {
496 if let Some(rhs) = rhs.strip_suffix(')') {
497 if let Some((src1, src2)) = split(rhs.trim(), "^") {
498 if let Some(src1) = parse_reg(src1) {
499 if let Some(src2) = parse_reg(src2) {
500 let dst = dst.into();
501 let src1 = src1.into();
502 let src2 = src2.into();
503 emit_and_continue!(Instruction::xnor(dst, src1, src2));
504 }
505 }
506 }
507 }
508 }
509
510 enum Op {
511 Add,
512 Sub,
513 And,
514 Xor,
515 Or,
516 Mul,
517 DivUnsigned,
518 DivSigned,
519 RemUnsigned,
520 RemSigned,
521 LessUnsigned,
522 LessSigned,
523 GreaterUnsigned,
524 GreaterSigned,
525 ShiftLeft,
526 ShiftRight,
527 ShiftArithmeticRight,
528 RotateLeft,
529 RotateRight,
530 AndInverted,
531 OrInverted,
532 }
533
534 #[allow(clippy::manual_map)]
535 let operation = if let Some(index) = rhs.find('+') {
536 Some((index, 1, Op::Add))
537 } else if let Some(index) = rhs.find("& ~") {
538 Some((index, 3, Op::AndInverted))
539 } else if let Some(index) = rhs.find('&') {
540 Some((index, 1, Op::And))
541 } else if let Some(index) = rhs.find("| ~") {
542 Some((index, 3, Op::OrInverted))
543 } else if let Some(index) = rhs.find('|') {
544 Some((index, 1, Op::Or))
545 } else if let Some(index) = rhs.find('^') {
546 Some((index, 1, Op::Xor))
547 } else if let Some(index) = rhs.find('*') {
548 Some((index, 1, Op::Mul))
549 } else if let Some(index) = rhs.find("/u") {
550 Some((index, 2, Op::DivUnsigned))
551 } else if let Some(index) = rhs.find("/s") {
552 Some((index, 2, Op::DivSigned))
553 } else if let Some(index) = rhs.find("%u") {
554 Some((index, 2, Op::RemUnsigned))
555 } else if let Some(index) = rhs.find("%s") {
556 Some((index, 2, Op::RemSigned))
557 } else if let Some(index) = rhs.find(">>a") {
558 Some((index, 3, Op::ShiftArithmeticRight))
559 } else if let Some(index) = rhs.find(">>r") {
560 Some((index, 3, Op::RotateRight))
561 } else if let Some(index) = rhs.find("<<r") {
562 Some((index, 3, Op::RotateLeft))
563 } else if let Some(index) = rhs.find("<<") {
564 Some((index, 2, Op::ShiftLeft))
565 } else if let Some(index) = rhs.find(">>") {
566 Some((index, 2, Op::ShiftRight))
567 } else if let Some(index) = rhs.find("<u") {
568 Some((index, 2, Op::LessUnsigned))
569 } else if let Some(index) = rhs.find("<s") {
570 Some((index, 2, Op::LessSigned))
571 } else if let Some(index) = rhs.find(">u") {
572 Some((index, 2, Op::GreaterUnsigned))
573 } else if let Some(index) = rhs.find(">s") {
574 Some((index, 2, Op::GreaterSigned))
575 } else if let Some(index) = rhs.find('-') {
576 Some((index, 1, Op::Sub))
578 } else {
579 None
580 };
581
582 if let Some((index, op_len, op)) = operation {
583 let src1 = rhs[..index].trim();
584 let src2 = rhs[index + op_len..].trim();
585
586 if let Some(src1) = parse_reg(src1) {
587 if let Some(src2) = parse_reg(src2) {
588 let dst = dst.into();
589 let src1 = src1.into();
590 let src2 = src2.into();
591 match op_marker {
592 OpMarker::I32 => {
593 emit_and_continue!(match op {
594 Op::Add => Instruction::add_32(dst, src1, src2),
595 Op::Sub => Instruction::sub_32(dst, src1, src2),
596 Op::And => {
597 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
598 }
599 Op::Xor => {
600 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
601 }
602 Op::Or => {
603 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
604 }
605 Op::Mul => Instruction::mul_32(dst, src1, src2),
606 Op::DivUnsigned => Instruction::div_unsigned_32(dst, src1, src2),
607 Op::DivSigned => Instruction::div_signed_32(dst, src1, src2),
608 Op::RemUnsigned => Instruction::rem_unsigned_32(dst, src1, src2),
609 Op::RemSigned => Instruction::rem_signed_32(dst, src1, src2),
610 Op::LessUnsigned => {
611 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
612 }
613 Op::LessSigned => {
614 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
615 }
616 Op::GreaterUnsigned => {
617 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
618 }
619 Op::GreaterSigned => {
620 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
621 }
622 Op::ShiftLeft => Instruction::shift_logical_left_32(dst, src1, src2),
623 Op::ShiftRight => Instruction::shift_logical_right_32(dst, src1, src2),
624 Op::ShiftArithmeticRight => Instruction::shift_arithmetic_right_32(dst, src1, src2),
625 Op::RotateLeft => Instruction::rotate_left_32(dst, src1, src2),
626 Op::RotateRight => Instruction::rotate_right_32(dst, src1, src2),
627 Op::AndInverted => {
628 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
629 }
630 Op::OrInverted => {
631 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
632 }
633 });
634 }
635 OpMarker::NONE => {
636 emit_and_continue!(match op {
637 Op::Add => Instruction::add_64(dst, src1, src2),
638 Op::Sub => Instruction::sub_64(dst, src1, src2),
639 Op::And => Instruction::and(dst, src1, src2),
640 Op::Xor => Instruction::xor(dst, src1, src2),
641 Op::Or => Instruction::or(dst, src1, src2),
642 Op::Mul => Instruction::mul_64(dst, src1, src2),
643 Op::DivUnsigned => Instruction::div_unsigned_64(dst, src1, src2),
644 Op::DivSigned => Instruction::div_signed_64(dst, src1, src2),
645 Op::RemUnsigned => Instruction::rem_unsigned_64(dst, src1, src2),
646 Op::RemSigned => Instruction::rem_signed_64(dst, src1, src2),
647 Op::LessUnsigned => Instruction::set_less_than_unsigned(dst, src1, src2),
648 Op::LessSigned => Instruction::set_less_than_signed(dst, src1, src2),
649 Op::GreaterUnsigned => Instruction::set_less_than_unsigned(dst, src2, src1),
650 Op::GreaterSigned => Instruction::set_less_than_signed(dst, src2, src1),
651 Op::ShiftLeft => Instruction::shift_logical_left_64(dst, src1, src2),
652 Op::ShiftRight => Instruction::shift_logical_right_64(dst, src1, src2),
653 Op::ShiftArithmeticRight => Instruction::shift_arithmetic_right_64(dst, src1, src2),
654 Op::RotateLeft => Instruction::rotate_left_64(dst, src1, src2),
655 Op::RotateRight => Instruction::rotate_right_64(dst, src1, src2),
656 Op::AndInverted => Instruction::and_inverted(dst, src1, src2),
657 Op::OrInverted => Instruction::or_inverted(dst, src1, src2),
658 });
659 }
660 }
661 } else if let Some(src2) = parse_immediate(src2).and_then(|value| value.try_into().ok()) {
662 let dst = dst.into();
663 let src1 = src1.into();
664 match op_marker {
665 OpMarker::I32 => {
666 emit_and_continue!(match op {
667 Op::Add => Instruction::add_imm_32(dst, src1, src2),
668 Op::Sub => Instruction::add_imm_32(dst, src1, (-(src2 as i32)) as u32),
669 Op::And => {
670 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
671 }
672 Op::Xor => {
673 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
674 }
675 Op::Or => {
676 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
677 }
678 Op::Mul => Instruction::mul_imm_32(dst, src1, src2),
679 Op::DivUnsigned | Op::DivSigned => {
680 return Err(format!(
681 "cannot parse line {nth_line}: i32 and division is not supported for immediates"
682 ));
683 }
684 Op::RemUnsigned | Op::RemSigned => {
685 return Err(format!(
686 "cannot parse line {nth_line}: i32 and modulo is not supported for immediates"
687 ));
688 }
689 Op::LessUnsigned => {
690 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
691 }
692 Op::LessSigned => {
693 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
694 }
695 Op::GreaterUnsigned => {
696 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
697 }
698 Op::GreaterSigned => {
699 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
700 }
701 Op::ShiftLeft => Instruction::shift_logical_left_imm_32(dst, src1, src2),
702 Op::ShiftRight => Instruction::shift_logical_right_imm_32(dst, src1, src2),
703 Op::ShiftArithmeticRight => Instruction::shift_arithmetic_right_imm_32(dst, src1, src2),
704 Op::RotateLeft => {
705 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
706 }
707 Op::RotateRight => Instruction::rotate_right_imm_32(dst, src1, src2),
708 Op::AndInverted => {
709 return Err(format!(
710 "cannot parse line {nth_line}: i32 and and_inverted not supported for immediates"
711 ));
712 }
713 Op::OrInverted => {
714 return Err(format!(
715 "cannot parse line {nth_line}: i32 and or_inverted not supported for immediates"
716 ));
717 }
718 });
719 }
720 OpMarker::NONE => {
721 emit_and_continue!(match op {
722 Op::Add => Instruction::add_imm_64(dst, src1, src2),
723 Op::Sub => Instruction::add_imm_64(dst, src1, (-(src2 as i32)) as u32),
724 Op::And => Instruction::and_imm(dst, src1, src2),
725 Op::Xor => Instruction::xor_imm(dst, src1, src2),
726 Op::Or => Instruction::or_imm(dst, src1, src2),
727 Op::Mul => Instruction::mul_imm_64(dst, src1, src2),
728 Op::DivUnsigned | Op::DivSigned => {
729 return Err(format!("cannot parse line {nth_line}: division is not supported for immediates"));
730 }
731 Op::RemUnsigned | Op::RemSigned => {
732 return Err(format!("cannot parse line {nth_line}: modulo is not supported for immediates"));
733 }
734 Op::LessUnsigned => Instruction::set_less_than_unsigned_imm(dst, src1, src2),
735 Op::LessSigned => Instruction::set_less_than_signed_imm(dst, src1, src2),
736 Op::GreaterUnsigned => Instruction::set_greater_than_unsigned_imm(dst, src1, src2),
737 Op::GreaterSigned => Instruction::set_greater_than_signed_imm(dst, src1, src2),
738 Op::ShiftLeft => Instruction::shift_logical_left_imm_64(dst, src1, src2),
739 Op::ShiftRight => Instruction::shift_logical_right_imm_64(dst, src1, src2),
740 Op::ShiftArithmeticRight => Instruction::shift_arithmetic_right_imm_64(dst, src1, src2),
741 Op::RotateLeft => {
742 return Err(format!("cannot parse line {nth_line}: rotate_left not supported for immediates"));
743 }
744 Op::RotateRight => Instruction::rotate_right_imm_64(dst, src1, src2),
745 Op::AndInverted => {
746 return Err(format!("cannot parse line {nth_line}: and_inverted not supported for immediates"));
747 }
748 Op::OrInverted => {
749 return Err(format!("cannot parse line {nth_line}: or_inverted not supported for immediates"));
750 }
751 });
752 }
753 }
754 }
755 } else if let Some(src1) = parse_immediate(src1).and_then(|value| value.try_into().ok()) {
756 if let Some(src2) = parse_reg(src2) {
757 let dst = dst.into();
758 let src2 = src2.into();
759 match op_marker {
760 OpMarker::I32 => {
761 emit_and_continue!(match op {
762 Op::Add => Instruction::add_imm_32(dst, src2, src1),
763 Op::Sub => Instruction::negate_and_add_imm_32(dst, src2, src1),
764 Op::And => {
765 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
766 }
767 Op::Xor => {
768 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
769 }
770 Op::Or => {
771 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
772 }
773 Op::Mul => Instruction::mul_imm_32(dst, src2, src1),
774 Op::DivUnsigned | Op::DivSigned => {
775 return Err(format!(
776 "cannot parse line {nth_line}: i32 and division is not supported for immediates"
777 ));
778 }
779 Op::RemUnsigned | Op::RemSigned => {
780 return Err(format!(
781 "cannot parse line {nth_line}: i32 and modulo is not supported for immediates"
782 ));
783 }
784 Op::LessUnsigned => {
785 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
786 }
787 Op::LessSigned => {
788 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
789 }
790 Op::GreaterUnsigned => {
791 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
792 }
793 Op::GreaterSigned => {
794 return Err(format!("cannot parse line {nth_line}: i32 not supported for operation"));
795 }
796 Op::ShiftLeft => Instruction::shift_logical_left_imm_alt_32(dst, src2, src1),
797 Op::ShiftRight => Instruction::shift_logical_right_imm_alt_32(dst, src2, src1),
798 Op::ShiftArithmeticRight => Instruction::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
799 Op::RotateLeft => {
800 return Err(format!(
801 "cannot parse line {nth_line}: i32 and rotate_left is not supported for immediates"
802 ));
803 }
804 Op::RotateRight => Instruction::rotate_right_imm_alt_32(dst, src2, src1),
805 Op::AndInverted => {
806 return Err(format!(
807 "cannot parse line {nth_line}: i32 and and_inverted not supported for operation"
808 ));
809 }
810 Op::OrInverted => {
811 return Err(format!(
812 "cannot parse line {nth_line}: i32 and or_inverted not supported for operation"
813 ));
814 }
815 });
816 }
817 OpMarker::NONE => {
818 emit_and_continue!(match op {
819 Op::Add => Instruction::add_imm_64(dst, src2, src1),
820 Op::Sub => Instruction::negate_and_add_imm_64(dst, src2, src1),
821 Op::And => Instruction::and_imm(dst, src2, src1),
822 Op::Xor => Instruction::xor_imm(dst, src2, src1),
823 Op::Or => Instruction::or_imm(dst, src2, src1),
824 Op::Mul => Instruction::mul_imm_64(dst, src2, src1),
825 Op::DivUnsigned | Op::DivSigned => {
826 return Err(format!("cannot parse line {nth_line}: division is not supported for immediates"));
827 }
828 Op::RemUnsigned | Op::RemSigned => {
829 return Err(format!("cannot parse line {nth_line}: modulo is not supported for immediates"));
830 }
831 Op::LessUnsigned => Instruction::set_greater_than_unsigned_imm(dst, src2, src1),
832 Op::LessSigned => Instruction::set_greater_than_signed_imm(dst, src2, src1),
833 Op::GreaterUnsigned => Instruction::set_less_than_unsigned_imm(dst, src2, src1),
834 Op::GreaterSigned => Instruction::set_less_than_signed_imm(dst, src2, src1),
835 Op::ShiftLeft => Instruction::shift_logical_left_imm_alt_64(dst, src2, src1),
836 Op::ShiftRight => Instruction::shift_logical_right_imm_alt_64(dst, src2, src1),
837 Op::ShiftArithmeticRight => Instruction::shift_arithmetic_right_imm_alt_64(dst, src2, src1),
838 Op::RotateLeft => {
839 return Err(format!("cannot parse line {nth_line}: i64 not supported for operation"));
840 }
841 Op::RotateRight => Instruction::rotate_right_imm_alt_64(dst, src2, src1),
842 Op::AndInverted => {
843 return Err(format!("cannot parse line {nth_line}: and_inverted not supported for immediates"));
844 }
845 Op::OrInverted => {
846 return Err(format!("cannot parse line {nth_line}: or_inverted not supported for immediates"));
847 }
848 });
849 }
850 }
851 }
852 }
853 }
854
855 if let Some(rhs) = rhs.strip_suffix(')') {
856 let rhs = rhs.trim();
857 if let Some((name, rhs)) = split(rhs, "(") {
858 type F = fn(RawReg, RawReg, RawReg) -> Instruction;
859 let ctor = match name {
860 "maxs" => Some(Instruction::maximum as F),
861 "maxu" => Some(Instruction::maximum_unsigned as F),
862 "mins" => Some(Instruction::minimum as F),
863 "minu" => Some(Instruction::minimum_unsigned as F),
864 _ => None,
865 };
866
867 if let Some(ctor) = ctor {
868 if let Some((src1, src2)) = split(rhs, ",") {
869 if let Some(src1) = parse_reg(src1) {
870 if let Some(src2) = parse_reg(src2) {
871 emit_and_continue!(ctor(dst.into(), src1.into(), src2.into()));
872 }
873 }
874 }
875 }
876 }
877 }
878
879 #[allow(clippy::manual_map)]
880 let load_kind = if let Some(rhs) = rhs.strip_prefix("u8") {
881 Some((LoadKind::U8, rhs))
882 } else if let Some(rhs) = rhs.strip_prefix("u16") {
883 Some((LoadKind::U16, rhs))
884 } else if let Some(rhs) = rhs.strip_prefix("u32") {
885 Some((LoadKind::U32, rhs))
886 } else if let Some(rhs) = rhs.strip_prefix("u64") {
887 Some((LoadKind::U64, rhs))
888 } else if let Some(rhs) = rhs.strip_prefix("i8") {
889 Some((LoadKind::I8, rhs))
890 } else if let Some(rhs) = rhs.strip_prefix("i16") {
891 Some((LoadKind::I16, rhs))
892 } else if let Some(rhs) = rhs.strip_prefix("i32") {
893 Some((LoadKind::I32, rhs))
894 } else {
895 None
896 };
897
898 if let Some((kind, rhs)) = load_kind {
899 if let Some((base, offset)) = parse_indirect_memory_access(rhs) {
900 let dst = dst.into();
901 let base = base.into();
902 let offset = offset as u32;
903 emit_and_continue!(match kind {
904 LoadKind::I8 => Instruction::load_indirect_i8(dst, base, offset),
905 LoadKind::I16 => Instruction::load_indirect_i16(dst, base, offset),
906 LoadKind::I32 => Instruction::load_indirect_i32(dst, base, offset),
907 LoadKind::U8 => Instruction::load_indirect_u8(dst, base, offset),
908 LoadKind::U16 => Instruction::load_indirect_u16(dst, base, offset),
909 LoadKind::U32 => Instruction::load_indirect_u32(dst, base, offset),
910 LoadKind::U64 => Instruction::load_indirect_u64(dst, base, offset),
911 });
912 } else if let Some(offset) = parse_absolute_memory_access(rhs) {
913 let dst = dst.into();
914 let offset = offset as u32;
915 emit_and_continue!(match kind {
916 LoadKind::I8 => Instruction::load_i8(dst, offset),
917 LoadKind::I16 => Instruction::load_i16(dst, offset),
918 LoadKind::I32 => Instruction::load_i32(dst, offset),
919 LoadKind::U8 => Instruction::load_u8(dst, offset),
920 LoadKind::U16 => Instruction::load_u16(dst, offset),
921 LoadKind::U32 => Instruction::load_u32(dst, offset),
922 LoadKind::U64 => Instruction::load_u64(dst, offset),
923 });
924 }
925 }
926 }
927
928 #[allow(clippy::manual_map)]
929 let store_kind = if let Some(lhs) = lhs.strip_prefix("u8") {
930 Some((StoreKind::U8, lhs))
931 } else if let Some(lhs) = lhs.strip_prefix("u16") {
932 Some((StoreKind::U16, lhs))
933 } else if let Some(lhs) = lhs.strip_prefix("u32") {
934 Some((StoreKind::U32, lhs))
935 } else if let Some(lhs) = lhs.strip_prefix("u64") {
936 Some((StoreKind::U64, lhs))
937 } else {
938 None
939 };
940
941 if let Some((kind, lhs)) = store_kind {
942 if let Some(offset) = parse_absolute_memory_access(lhs) {
943 let offset = offset as u32;
944 if let Some(rhs) = parse_reg(rhs) {
945 let rhs = rhs.into();
946 emit_and_continue!(match kind {
947 StoreKind::U8 => Instruction::store_u8(rhs, offset),
948 StoreKind::U16 => Instruction::store_u16(rhs, offset),
949 StoreKind::U32 => Instruction::store_u32(rhs, offset),
950 StoreKind::U64 => Instruction::store_u64(rhs, offset),
951 });
952 } else if let Some(rhs) = parse_immediate(rhs).and_then(|value| value.try_into().ok()) {
953 emit_and_continue!(match kind {
954 StoreKind::U8 => match u8::try_from(rhs) {
955 Ok(_) => Instruction::store_imm_u8(offset, rhs),
956 Err(_) => return Err(format!("cannot parse line {nth_line}: immediate larger than u8")),
957 },
958 StoreKind::U16 => match u16::try_from(rhs) {
959 Ok(_) => Instruction::store_imm_u16(offset, rhs),
960 Err(_) => return Err(format!("cannot parse line {nth_line}: immediate larger than u16")),
961 },
962 StoreKind::U32 => Instruction::store_imm_u32(offset, rhs),
963 StoreKind::U64 => Instruction::store_imm_u64(offset, rhs),
964 });
965 }
966 } else if let Some((base, offset)) = parse_indirect_memory_access(lhs) {
967 let base = base.into();
968 let offset = offset as u32;
969 if let Some(rhs) = parse_reg(rhs) {
970 let rhs = rhs.into();
971 emit_and_continue!(match kind {
972 StoreKind::U8 => Instruction::store_indirect_u8(rhs, base, offset),
973 StoreKind::U16 => Instruction::store_indirect_u16(rhs, base, offset),
974 StoreKind::U32 => Instruction::store_indirect_u32(rhs, base, offset),
975 StoreKind::U64 => Instruction::store_indirect_u64(rhs, base, offset),
976 });
977 } else if let Some(rhs) = parse_immediate(rhs).and_then(|value| value.try_into().ok()) {
978 emit_and_continue!(match kind {
979 StoreKind::U8 => match u8::try_from(rhs) {
980 Ok(_) => Instruction::store_imm_indirect_u8(base, offset, rhs),
981 Err(_) => return Err(format!("cannot parse line {nth_line}: immediate larger than u8")),
982 },
983 StoreKind::U16 => match u16::try_from(rhs) {
984 Ok(_) => Instruction::store_imm_indirect_u16(base, offset, rhs),
985 Err(_) => return Err(format!("cannot parse line {nth_line}: immediate larger than u16")),
986 },
987 StoreKind::U32 => Instruction::store_imm_indirect_u32(base, offset, rhs),
988 StoreKind::U64 => Instruction::store_imm_indirect_u64(base, offset, rhs),
989 });
990 }
991 }
992 }
993 }
994
995 return Err(format!("cannot parse line {nth_line}: \"{original_line}\""));
996 }
997
998 let mut code = Vec::new();
999 let mut jump_table = Vec::new();
1000 for instruction in instructions {
1001 match instruction {
1002 MaybeInstruction::Instruction(instruction) => {
1003 code.push(instruction);
1004 }
1005 MaybeInstruction::LoadLabelAddress(dst, label) => {
1006 let Some(&target_index) = label_to_index.get(&*label) else {
1007 return Err(format!("label is not defined: \"{label}\""));
1008 };
1009
1010 jump_table.push(target_index);
1011 code.push(Instruction::load_imm(
1012 dst.into(),
1013 (jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT,
1014 ));
1015 }
1016 MaybeInstruction::LoadImmAndJump(dst, value, label) => {
1017 let Some(&target_index) = label_to_index.get(&*label) else {
1018 return Err(format!("label is not defined: \"{label}\""));
1019 };
1020
1021 code.push(Instruction::load_imm_and_jump(dst.into(), value, target_index));
1022 }
1023 MaybeInstruction::Jump(label) => {
1024 let Some(&target_index) = label_to_index.get(&*label) else {
1025 return Err(format!("label is not defined: \"{label}\""));
1026 };
1027 code.push(Instruction::jump(target_index));
1028 }
1029 MaybeInstruction::Branch(label, kind, lhs, rhs) => {
1030 let Some(&target_index) = label_to_index.get(&*label) else {
1031 return Err(format!("label is not defined: \"{label}\""));
1032 };
1033
1034 let lhs = lhs.into();
1035 let rhs = rhs.into();
1036 let instruction = match kind {
1037 ConditionKind::Eq => Instruction::branch_eq(lhs, rhs, target_index),
1038 ConditionKind::NotEq => Instruction::branch_not_eq(lhs, rhs, target_index),
1039 ConditionKind::LessSigned => Instruction::branch_less_signed(lhs, rhs, target_index),
1040 ConditionKind::LessUnsigned => Instruction::branch_less_unsigned(lhs, rhs, target_index),
1041 ConditionKind::GreaterOrEqualSigned => Instruction::branch_greater_or_equal_signed(lhs, rhs, target_index),
1042 ConditionKind::GreaterOrEqualUnsigned => Instruction::branch_greater_or_equal_unsigned(lhs, rhs, target_index),
1043
1044 ConditionKind::LessOrEqualSigned => Instruction::branch_greater_or_equal_signed(rhs, lhs, target_index),
1045 ConditionKind::LessOrEqualUnsigned => Instruction::branch_greater_or_equal_unsigned(rhs, lhs, target_index),
1046 ConditionKind::GreaterSigned => Instruction::branch_less_signed(rhs, lhs, target_index),
1047 ConditionKind::GreaterUnsigned => Instruction::branch_less_unsigned(rhs, lhs, target_index),
1048 };
1049 code.push(instruction);
1050 }
1051 MaybeInstruction::BranchImm(label, kind, lhs, rhs) => {
1052 let Some(&target_index) = label_to_index.get(&*label) else {
1053 return Err(format!("label is not defined: \"{label}\""));
1054 };
1055
1056 let lhs = lhs.into();
1057 let rhs = rhs as u32;
1058 let instruction = match kind {
1059 ConditionKind::Eq => Instruction::branch_eq_imm(lhs, rhs, target_index),
1060 ConditionKind::NotEq => Instruction::branch_not_eq_imm(lhs, rhs, target_index),
1061 ConditionKind::LessSigned => Instruction::branch_less_signed_imm(lhs, rhs, target_index),
1062 ConditionKind::LessUnsigned => Instruction::branch_less_unsigned_imm(lhs, rhs, target_index),
1063 ConditionKind::GreaterOrEqualSigned => Instruction::branch_greater_or_equal_signed_imm(lhs, rhs, target_index),
1064 ConditionKind::GreaterOrEqualUnsigned => Instruction::branch_greater_or_equal_unsigned_imm(lhs, rhs, target_index),
1065 ConditionKind::LessOrEqualSigned => Instruction::branch_less_or_equal_signed_imm(lhs, rhs, target_index),
1066 ConditionKind::LessOrEqualUnsigned => Instruction::branch_less_or_equal_unsigned_imm(lhs, rhs, target_index),
1067 ConditionKind::GreaterSigned => Instruction::branch_greater_signed_imm(lhs, rhs, target_index),
1068 ConditionKind::GreaterUnsigned => Instruction::branch_greater_unsigned_imm(lhs, rhs, target_index),
1069 };
1070 code.push(instruction);
1071 }
1072 };
1073 }
1074
1075 let Some(isa) = isa else {
1076 return Err("no ISA was declared in the program".into());
1077 };
1078
1079 let mut builder = crate::writer::ProgramBlobBuilder::new(isa);
1080 builder.set_ro_data(ro_data);
1081 builder.set_ro_data_size(ro_data_size);
1082 builder.set_rw_data(rw_data);
1083 builder.set_rw_data_size(rw_data_size);
1084 builder.set_stack_size(stack_size);
1085 builder.set_code(&code, &jump_table);
1086 for (label, export) in exports {
1087 match export {
1088 Export::ByBlock(target_index) => builder.add_export_by_basic_block(target_index, label.as_bytes()),
1089 Export::ByInstruction(target_index) => builder.add_export_by_instruction(target_index, label.as_bytes()),
1090 }
1091 }
1092
1093 builder.to_vec()
1094}
1095
1096#[cfg(test)]
1097#[track_caller]
1098fn assert_assembler(input: &str, expected_output: &str) {
1099 use crate::program::InstructionFormat;
1100 use alloc::string::ToString;
1101
1102 let expected_output_clean: Vec<_> = expected_output.trim().split('\n').map(|line| line.trim()).collect();
1103 let expected_output_clean = expected_output_clean.join("\n");
1104
1105 let blob = assemble(Some(InstructionSetKind::Latest64), input).expect("failed to assemble");
1106 let program = crate::program::ProgramBlob::parse(blob.into()).unwrap();
1107 let output: Vec<_> = program
1108 .instructions()
1109 .take_while(|inst| (inst.offset.0 as usize) < program.code().len())
1110 .map(|inst| inst.kind.display(&InstructionFormat::default()).to_string())
1111 .collect();
1112 let output = output.join("\n");
1113 assert_eq!(output, expected_output_clean);
1114}
1115
1116#[test]
1117fn test_assembler_basics() {
1118 assert_assembler(
1119 "
1120 // This is a comment.
1121 a0 = a1 + a2
1122 a3 = a4 + a5
1123 // This is another comment.
1124 ",
1125 "
1126 a0 = a1 + a2
1127 a3 = a4 + a5
1128 ",
1129 );
1130
1131 assert_assembler(
1132 "
1133 jump @label
1134 a0 = 1
1135 @label:
1136 a0 = 2
1137 ",
1138 "
1139 jump 6
1140 a0 = 0x1
1141 fallthrough
1142 a0 = 0x2
1143 ",
1144 );
1145}