mclangc/src/compile/linux_x86_64.rs
MCorange 7e46c07cca Finalising functions
typechecking still buggy, but close enough cause i cant handle any more
typechecker dev

added:
 - Functions
 - Constants
 - Better file positions for error reporting
 - Better error reporting, with examples being drawn in term

-MC
2023-04-13 00:34:08 +03:00

590 lines
24 KiB
Rust

use std::{fs, path::PathBuf, io::{Write, BufWriter}, collections::HashMap};
use crate::{constants::{Operator, OpType, KeywordType}, Args};
use color_eyre::Result;
use crate::compile::commands::linux_x86_64_compile_and_link;
use crate::constants::InstructionType;
use super::{commands::linux_x86_64_run, Constant, Memory, Function};
use eyre::eyre;
pub fn compile(tokens: &[Operator], args: &Args) -> Result<i32>{
let debug = args.get_opt_level()? < 1;
let mut of_c = PathBuf::from(&args.out_file);
let (mut of_o, mut of_a) = if args.out_file == *crate::DEFAULT_OUT_FILE {
let of_o = PathBuf::from("/tmp/mclang_comp.o");
let of_a = PathBuf::from("/tmp/mclang_comp.nasm");
(of_o, of_a)
} else {
let of_o = PathBuf::from(&args.out_file);
let of_a = PathBuf::from(&args.out_file);
(of_o, of_a)
};
of_c.set_extension("");
of_o.set_extension("o");
of_a.set_extension("nasm");
let file = fs::File::create(&of_a)?;
let mut writer = BufWriter::new(&file);
let mut memories: Vec<Memory> = Vec::new();
let mut constants: HashMap<String, Constant> = HashMap::new();
let mut functions: Vec<Function> = Vec::new();
// println!("{}", tokens.len());
let mut strings: Vec<String> = Vec::new();
writeln!(writer, "BITS 64")?;
writeln!(writer, "segment .text")?;
writeln!(writer, "_dbg_print:")?;
writeln!(writer, " mov r9, -3689348814741910323")?;
writeln!(writer, " sub rsp, 40")?;
writeln!(writer, " mov BYTE [rsp+31], 10")?;
writeln!(writer, " lea rcx, [rsp+30]")?;
writeln!(writer, ".L2:")?;
writeln!(writer, " mov rax, rdi")?;
writeln!(writer, " lea r8, [rsp+32]")?;
writeln!(writer, " mul r9")?;
writeln!(writer, " mov rax, rdi")?;
writeln!(writer, " sub r8, rcx")?;
writeln!(writer, " shr rdx, 3")?;
writeln!(writer, " lea rsi, [rdx+rdx*4]")?;
writeln!(writer, " add rsi, rsi")?;
writeln!(writer, " sub rax, rsi")?;
writeln!(writer, " add eax, 48")?;
writeln!(writer, " mov BYTE [rcx], al")?;
writeln!(writer, " mov rax, rdi")?;
writeln!(writer, " mov rdi, rdx")?;
writeln!(writer, " mov rdx, rcx")?;
writeln!(writer, " sub rcx, 1")?;
writeln!(writer, " cmp rax, 9")?;
writeln!(writer, " ja .L2")?;
writeln!(writer, " lea rax, [rsp+32]")?;
writeln!(writer, " mov edi, 1")?;
writeln!(writer, " sub rdx, rax")?;
writeln!(writer, " xor eax, eax")?;
writeln!(writer, " lea rsi, [rsp+32+rdx]")?;
writeln!(writer, " mov rdx, r8")?;
writeln!(writer, " mov rax, 1")?;
writeln!(writer, " syscall")?;
writeln!(writer, " add rsp, 40")?;
writeln!(writer, " ret")?;
writeln!(writer, "global _start")?;
writeln!(writer, "_start:")?;
writeln!(writer, " lea rbp, [rel ret_stack]")?;
writeln!(writer, " call main")?;
writeln!(writer, " jmp end")?;
let mut ti = 0;
while ti < tokens.len() {
let token = &tokens[ti];
// println!("{:?}", token);
if debug {
writeln!(writer, "addr_{ti}:")?;
if token.typ == OpType::Instruction(InstructionType::PushInt) {
writeln!(writer, " ;; -- {:?} {}", token.typ, token.value)?;
} else if token.typ == OpType::Instruction(InstructionType::PushStr) {
writeln!(writer, " ;; -- {:?} {}", token.typ, token.text.escape_debug())?;
} else {
writeln!(writer, " ;; -- {:?}", token.typ)?;
}
} else {
if ti != 0 && tokens[ti-1].typ == OpType::Keyword(KeywordType::Else) ||
tokens[ti-1].typ == OpType::Keyword(KeywordType::End){
writeln!(writer, "addr_{ti}:")?;
}
if ti + 1 < tokens.len() && tokens[ti+1].typ == OpType::Keyword(KeywordType::End) {
writeln!(writer, "addr_{ti}:")?;
}
if let OpType::Keyword(keyword) = &token.typ {
match keyword {
&KeywordType::End |
&KeywordType::While => {
writeln!(writer, "addr_{ti}:")?;
}
_ => ()
}
}
}
match token.typ.clone() {
// stack
OpType::Instruction(instruction) => {
match instruction {
InstructionType::PushInt => {
writeln!(writer, " mov rax, {}", token.value)?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::PushStr => {
writeln!(writer, " mov rax, {}", token.text.len())?;
writeln!(writer, " push rax")?;
writeln!(writer, " mov rax, str_{}", strings.len())?;
writeln!(writer, " push rax")?;
strings.push(token.text.clone());
ti += 1;
}
InstructionType::Drop => {
writeln!(writer, " pop rax")?;
ti += 1;
},
InstructionType::Print => {
writeln!(writer, " pop rdi")?;
writeln!(writer, " call _dbg_print")?;
ti += 1;
},
InstructionType::Dup => {
writeln!(writer, " pop rax")?;
writeln!(writer, " push rax")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Rot => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rcx")?;
writeln!(writer, " push rbx")?;
writeln!(writer, " push rax")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::Swap => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " push rax")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::Over => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " push rbx")?;
writeln!(writer, " push rax")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::Load8 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " xor rbx, rbx")?;
writeln!(writer, " mov bl, byte [rax]")?;
writeln!(writer, " push rbx")?;
ti += 1;
}
InstructionType::Store8 => {
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " mov byte [rax], bl")?;
ti += 1;
}
InstructionType::Load32 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " xor rbx, rbx")?;
writeln!(writer, " mov bl, dword [rax]")?;
writeln!(writer, " push rbx")?;
ti += 1;
}
InstructionType::Store32 => {
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " mov dword[rax], bl")?;
ti += 1;
}
InstructionType::Load64 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " xor rbx, rbx")?;
writeln!(writer, " mov bl, qword [rax]")?;
writeln!(writer, " push rbx")?;
ti += 1;
}
InstructionType::Store64 => {
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " mov qword [rax], bl")?;
ti += 1;
}
// math
InstructionType::Plus => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " add rax, rbx")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Minus => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " sub rbx, rax")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::Equals => {
writeln!(writer, " mov rcx, 0")?;
writeln!(writer, " mov rdx, 1")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " cmp rax, rbx")?;
writeln!(writer, " cmove rcx, rdx")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::Lt => {
writeln!(writer, " mov rcx, 0")?;
writeln!(writer, " mov rdx, 1")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " cmp rax, rbx")?;
writeln!(writer, " cmovl rcx, rdx")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::Gt => {
writeln!(writer, " mov rcx, 0")?;
writeln!(writer, " mov rdx, 1")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " cmp rax, rbx")?;
writeln!(writer, " cmovg rcx, rdx")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::NotEquals => {
writeln!(writer, " mov rcx, 1")?;
writeln!(writer, " mov rdx, 0")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " cmp rax, rbx")?;
writeln!(writer, " cmove rcx, rdx")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::Le => {
writeln!(writer, " mov rcx, 0")?;
writeln!(writer, " mov rdx, 1")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " cmp rax, rbx")?;
writeln!(writer, " cmovle rcx, rdx")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::Ge => {
writeln!(writer, " mov rcx, 0")?;
writeln!(writer, " mov rdx, 1")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " cmp rax, rbx")?;
writeln!(writer, " cmovge rcx, rdx")?;
writeln!(writer, " push rcx")?;
ti += 1;
},
InstructionType::Band => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " and rbx, rax")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::Bor => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " or rbx, rax")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::Shr => {
writeln!(writer, " pop rcx")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " shr rbx, cl")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::Shl => {
writeln!(writer, " pop rcx")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " shl rbx, cl")?;
writeln!(writer, " push rbx")?;
ti += 1;
},
InstructionType::DivMod => {
writeln!(writer, " xor rdx, rdx")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " pop rax")?;
writeln!(writer, " div rbx")?;
writeln!(writer, " push rax")?;
writeln!(writer, " push rdx")?;
ti += 1;
},
InstructionType::Mul => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " mul rbx")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall0 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall1 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rdi")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall2 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rdi")?;
writeln!(writer, " pop rsi")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall3 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rdi")?;
writeln!(writer, " pop rsi")?;
writeln!(writer, " pop rdx")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall4 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rdi")?;
writeln!(writer, " pop rsi")?;
writeln!(writer, " pop rdx")?;
writeln!(writer, " pop r10")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall5 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rdi")?;
writeln!(writer, " pop rsi")?;
writeln!(writer, " pop rdx")?;
writeln!(writer, " pop r10")?;
writeln!(writer, " pop r8")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::Syscall6 => {
writeln!(writer, " pop rax")?;
writeln!(writer, " pop rdi")?;
writeln!(writer, " pop rsi")?;
writeln!(writer, " pop rdx")?;
writeln!(writer, " pop r10")?;
writeln!(writer, " pop r8")?;
writeln!(writer, " pop r9")?;
writeln!(writer, " syscall")?;
writeln!(writer, " push rax")?;
ti += 1;
},
InstructionType::MemUse => {
writeln!(writer, " push mem_{}", token.addr.unwrap())?;
ti += 1;
},
InstructionType::None => {
println!("{token:?}");
unreachable!()
},
InstructionType::FnCall => {
writeln!(writer, " call {}", token.text)?;
ti += 1;
},
InstructionType::Return => {
writeln!(writer, " sub rbp, 8")?;
writeln!(writer, " mov rbx, qword [rbp]")?;
writeln!(writer, " push rbx")?;
writeln!(writer, " ret")?;
ti += 1;
},
InstructionType::CastBool |
InstructionType::CastPtr |
InstructionType::CastInt |
InstructionType::CastVoid |
InstructionType::TypeBool |
InstructionType::TypePtr |
InstructionType::TypeInt |
InstructionType::TypeVoid |
InstructionType::TypeStr |
InstructionType::TypeAny |
InstructionType::Returns |
InstructionType::With => {
ti += 1;
}
InstructionType::ConstUse => {
writeln!(writer, " mov rax, qword [const_{}]", token.text)?;
writeln!(writer, " push rax")?;
let mut c = constants.get(&token.text).unwrap().clone();
c.used = true;
constants.remove(&token.text);
constants.insert(token.text.clone(), c);
ti += 1;
},
}
}
OpType::Keyword(keyword) => {
match keyword {
// block
KeywordType::If |
KeywordType::Do => {
writeln!(writer, " pop rax")?;
writeln!(writer, " test rax, rax")?;
writeln!(writer, " jz addr_{}", token.jmp)?;
ti += 1;
}
KeywordType::Else => {
writeln!(writer, " jmp addr_{}", token.jmp)?;
ti += 1;
},
KeywordType::While => {
ti += 1;
}
KeywordType::End => {
if ti + 1 != token.jmp {
// writeln!(writer, " jmp addr_{}", token.jmp)?;
}
ti += 1;
},
KeywordType::Memory => {
memories.push(Memory { size: token.value, loc: token.loc.clone(), id: token.addr.unwrap() });
ti += 1;
}
KeywordType::ConstantDef => {
// TODO: after we add c style strings add supoort for them in constants
let a = args.get_opt_level()? < 1;
let c = Constant{
loc: token.loc.clone(),
name: token.text.clone(),
value_i: Some(token.value),
value_s: None,
used: a,
};
constants.insert(token.text.clone(), c);
ti += 1;
},
KeywordType::FunctionDef => {
writeln!(writer, "{}:", token.text)?;
writeln!(writer, " pop rbx")?;
writeln!(writer, " mov qword [rbp], rbx")?;
writeln!(writer, " add rbp, 8")?;
functions.push(Function { loc: token.loc.clone(), name: token.text.clone() });
ti += 1;
},
KeywordType::FunctionDone => {
writeln!(writer, " sub rbp, 8")?;
writeln!(writer, " mov rbx, qword [rbp]")?;
writeln!(writer, " push rbx")?;
writeln!(writer, " ret")?;
ti += 1;
}
KeywordType::FunctionThen => ti += 1,
KeywordType::Function |
KeywordType::Include |
KeywordType::Constant => unreachable!(),
}
}
}
}
writeln!(writer, "addr_{ti}:")?;
writeln!(writer, "end:")?;
writeln!(writer, " mov rax, 60")?;
writeln!(writer, " mov rdi, 0")?;
writeln!(writer, " syscall")?;
writeln!(writer, "segment .data")?;
for (i, s) in strings.iter().enumerate() {
let s_chars = s.chars().map(|c| (c as u32).to_string()).collect::<Vec<String>>();
let s_list = s_chars.join(",");
writeln!(writer, " str_{}: db {} ; {}", i, s_list, s.escape_default())?;
}
for (_, c) in constants {
if !c.used {
continue;
}
if let Some(v) = &c.value_i {
writeln!(writer, " const_{}: dq {}", c.name, v)?;
} else if let Some(_v) = &c.value_s {
todo!();
} else {
unreachable!();
}
}
writeln!(writer, "segment .bss")?;
for s in memories {
writeln!(writer, " mem_{}: resb {}", s.id, s.size)?;
}
writeln!(writer, " ret_stack: resq 256")?;
// for t in tokens {
// println!("{t:?}");
// }
writer.flush()?;
pre_compile_steps(
String::from_utf8_lossy(writer.buffer()).to_string().as_str(),
functions
)?;
linux_x86_64_compile_and_link(&of_a, &of_o, &of_c, args.quiet)?;
if args.run {
let c = linux_x86_64_run(&of_c, &[], args.quiet)?;
return Ok(c);
}
Ok(0)
}
fn pre_compile_steps(_code: &str, functions: Vec<Function>) -> Result<()> {
let mut has_main = false;
for func in functions {
if func.name == "main" {
has_main = true;
}
}
if !has_main {
crate::errors::missing_main_fn();
return Err(eyre!(""));
}
Ok(())
}