From fe8e9da774984799037e5fec6d70ce5d8385c05d Mon Sep 17 00:00:00 2001 From: MCorange99 Date: Sat, 23 Mar 2024 02:28:20 +0200 Subject: [PATCH] something --- include/linux/io.mcl | 6 +- src/compiler/x86_64_linux_nasm/mod.rs | 93 +++++---- src/lexer/mod.rs | 207 ++++++++++++++------ src/parser/mod.rs | 266 ++++++++++++++++++++++---- src/parser/precompiler.rs | 53 ++++- src/parser/utils.rs | 7 +- src/types/ast/mod.rs | 60 +++++- src/types/token/mod.rs | 62 +++++- test.mcl | 56 +++--- 9 files changed, 633 insertions(+), 177 deletions(-) diff --git a/include/linux/io.mcl b/include/linux/io.mcl index de2f942..8dddb95 100644 --- a/include/linux/io.mcl +++ b/include/linux/io.mcl @@ -1,14 +1,14 @@ -fn fwrite with int ptr int returns int then +fn fwrite with u64 ptr u64 returns u64 then SYS_write syscall3 done -fn puts with int ptr int returns int then +fn puts with u64 ptr u64 returns u64 then STDOUT fwrite drop done -fn eputs with int ptr int returns int then +fn eputs with u64 ptr u64 returns u64 then STDERR fwrite drop done \ No newline at end of file diff --git a/src/compiler/x86_64_linux_nasm/mod.rs b/src/compiler/x86_64_linux_nasm/mod.rs index 4de3e93..4932d7f 100644 --- a/src/compiler/x86_64_linux_nasm/mod.rs +++ b/src/compiler/x86_64_linux_nasm/mod.rs @@ -3,7 +3,7 @@ mod utils; use std::path::PathBuf; use std::{fs::File, io::BufWriter, path::Path}; use std::io::Write; -use crate::types::ast::{AstNode, Function, Module, Program}; +use crate::types::ast::{AstNode, EscIdent, Function, MemSize, Module, Program}; use crate::types::token::{InstructionType, Token, TokenType}; use super::utils::run_cmd; @@ -14,7 +14,8 @@ use super::Compiler; pub struct X86_64LinuxNasmCompiler { strings: Vec, - func_mem_i: Vec, + // func_mem_i: Vec, + // func_mem_list: HashMap, if_i: usize, while_i: usize, used_consts: Vec @@ -272,13 +273,10 @@ impl X86_64LinuxNasmCompiler { InstructionType::CastPtr | InstructionType::CastInt | InstructionType::CastVoid => (), //? Possibly have a use for this - InstructionType::TypeBool | - InstructionType::TypePtr | - InstructionType::TypeInt | - InstructionType::TypeVoid | - InstructionType::TypeAny | InstructionType::FnCall | InstructionType::MemUse | + InstructionType::StructPath(_) | + InstructionType::StructItem(_) | InstructionType::ConstUse => unreachable!(), InstructionType::Return => { writeln!(fd, " sub rbp, 8")?; @@ -289,29 +287,26 @@ impl X86_64LinuxNasmCompiler { } }, TokenType::Keyword(_) | + TokenType::Type(_) | TokenType::Unknown(_) => unreachable!(), } Ok(()) } fn handle_module(&mut self, fd: &mut BufWriter, prog: &Program, module: &Module) -> anyhow::Result<()> { - writeln!(fd, "; {} Module {} START", module.path.join("::"), module.ident)?; + writeln!(fd, "; {} Module START", module.path.join("::"))?; self.handle_ast_list(fd, prog, module.body.clone())?; - writeln!(fd, "; {} Module {} END", module.path.join("::"), module.ident)?; + writeln!(fd, "; {} Module END", module.path.join("::"))?; Ok(()) } fn handle_function(&mut self, fd: &mut BufWriter, prog: &Program, func: &Function) -> anyhow::Result<()> { - writeln!(fd, "{f}: ; fn {f}", f=func.ident)?; - writeln!(fd, " pop rbx")?; - writeln!(fd, " mov qword [rbp], rbx")?; - writeln!(fd, " add rbp, 8")?; + writeln!(fd, "{f}: ; fn {f}", f=func.get_ident_escaped())?; + writeln!(fd, " fn_setup")?; self.handle_ast_list(fd, prog, func.body.clone())?; - writeln!(fd, " sub rbp, 8")?; - writeln!(fd, " mov rbx, qword [rbp]")?; - writeln!(fd, " push rbx")?; + writeln!(fd, " fn_cleanup")?; writeln!(fd, " ret")?; Ok(()) } @@ -355,21 +350,27 @@ impl X86_64LinuxNasmCompiler { writeln!(fd, "; WHILE({id}) END")?; }, AstNode::Module(m) => self.handle_module(fd, prog, m)?, - AstNode::Memory(m) => { - if !m.statc { - todo!() - } + AstNode::Memory(_) => { + //? Possibly allow stack based allocation somehow + // if !m.statc { + // todo!() + // } }, - AstNode::MemUse(_) => { - + AstNode::MemUse(m) => { + let tmp = if let Some(disp) = m.disp { + format!("+{disp}") + } else { + String::new() + }; + writeln!(fd, " push m_{}{}", m.get_ident_escaped(), tmp)?; }, AstNode::ConstUse(c) => { - self.used_consts.push(c.ident.clone()); - writeln!(fd, " mov rax, qword [c_{}]", c.ident)?; + self.used_consts.push(c.get_ident_escaped()); + writeln!(fd, " mov rax, qword [c_{}]", c.get_ident_escaped())?; writeln!(fd, " push rax")?; }, AstNode::FnCall(f)=> { - writeln!(fd, " call {f} ; FUNCTIONCALL({f:?})", f=f.ident)?; + writeln!(fd, " call {f} ; FUNCTIONCALL({f:?})", f=f.get_ident_escaped())?; }, AstNode::Block(b)=> { writeln!(fd, "; BLOCK({}) START", b.comment)?; @@ -381,6 +382,11 @@ impl X86_64LinuxNasmCompiler { AstNode::Str(_, _) | AstNode::CStr(_, _) | AstNode::Char(_, _) => unreachable!(), + AstNode::StructDef(_) => (), + AstNode::StructDispPush { disp, ident, .. } => { + writeln!(fd, " mov rax, {} ; STRUCTDISPPUSH({})", disp, ident)?; + writeln!(fd, " push rax")?; + }, } } Ok(()) @@ -395,6 +401,8 @@ impl Compiler for X86_64LinuxNasmCompiler { used_consts: Vec::new(), if_i: 0, while_i: 0, + // func_mem_i: Vec::new(), + // func_mem_list: HashMap::new(), } } @@ -402,6 +410,18 @@ impl Compiler for X86_64LinuxNasmCompiler { writeln!(fd, "BITS 64")?; writeln!(fd, "segment .text")?; + + writeln!(fd, "%macro fn_setup 0")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " mov qword [rbp], rbx")?; + writeln!(fd, " add rbp, 8")?; + writeln!(fd, "%endmacro")?; + writeln!(fd, "%macro fn_cleanup 0")?; + writeln!(fd, " sub rbp, 8")?; + writeln!(fd, " mov rbx, qword [rbp]")?; + writeln!(fd, " push rbx")?; + writeln!(fd, "%endmacro")?; + writeln!(fd, "{}", utils::DBG_PRINT)?; writeln!(fd, "global _start")?; writeln!(fd, "_start:")?; @@ -425,22 +445,22 @@ impl Compiler for X86_64LinuxNasmCompiler { writeln!(fd, "segment .data")?; for (_, v) in prog.constants.iter() { - if !self.used_consts.contains(&v.ident) { + if !self.used_consts.contains(&v.get_ident_escaped()) { continue; } match Box::leak(v.value.clone()) { AstNode::Int(_, val) => { - writeln!(fd, "c_{}: dq {}", v.ident, val)?; + writeln!(fd, " c_{}: dq {}", v.get_ident_escaped(), val)?; } AstNode::Str(_, val) | AstNode::CStr(_, val) => { let s_chars = val.chars().map(|c| (c as u32).to_string()).collect::>(); let s_list = s_chars.join(","); - writeln!(fd, "c_{}: db {} ; {}", v.ident, s_list, val.escape_debug())?; + writeln!(fd, " c_{}: db {} ; {}", v.get_ident_escaped(), s_list, val.escape_debug())?; } AstNode::Char(_, val) => { - writeln!(fd, "c_{}: db {} ; '{}'", v.ident, *val as u8, val)?; + writeln!(fd, " c_{}: db {} ; '{}'", v.get_ident_escaped(), *val as u8, val)?; } c => panic!("{c:?}") }; @@ -449,12 +469,21 @@ impl Compiler for X86_64LinuxNasmCompiler { for (i, s) in self.strings.iter().enumerate() { let s_chars = s.chars().map(|c| (c as u32).to_string()).collect::>(); let s_list = s_chars.join(","); - writeln!(fd, "str_{i}: db {} ; STRDEF({})", s_list, s.escape_debug())?; + writeln!(fd, " str_{i}: db {} ; STRDEF({})", s_list, s.escape_debug())?; } writeln!(fd, "segment .bss")?; - writeln!(fd, "ret_stack: resq 256")?; + writeln!(fd, " ret_stack: resq 256")?; - //TODO: Memories + for (_, v) in &prog.memories { + match &v.size { + MemSize::Size(s) => { + writeln!(fd, " m_{}: resb {}", v.get_ident_escaped(), s)?; + }, + MemSize::Type(tt) => { + writeln!(fd, " m_{}: resb {} ; {:?}", v.get_ident_escaped(), tt.get_size(), tt)?; + }, + } + } Ok(()) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index ce75198..3124647 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,7 +1,7 @@ use std::path::Path; use anyhow::bail; -use crate::{error, types::{common::Loc, token::{InstructionType, KeywordType, Token, TokenType}}}; +use crate::{error, types::{common::Loc, token::{InstructionType, KeywordType, Token, TokenType, TypeType}}}; @@ -135,6 +135,88 @@ impl Lexer { self.tokens.push(Token::new(TokenType::Instruction(InstructionType::PushStr(str)), self.loc(), buf.clone())); buf.clear(); } + ':' if chars.get(idx + 1) == Some(&':') => { + let mut p_buf = vec![buf.clone()]; + buf.clear(); + idx += 2; // skip :: + self.loc.col += 2; + + while idx < chars.len() { + match chars[idx] { + ' ' | '\n' | '\r' => { + if !p_buf.is_empty() { + p_buf.push(buf.clone()); + } + + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::StructPath(p_buf.clone())), start_loc.clone(), p_buf.clone().join("::"))); + buf.clear(); + break; + } + c @ ('\'' | '"') => { + error!({loc => self.loc()}, "Invalid char in struct path token, expected /a-z|A-Z|0-9|_|-/ got {c}"); + bail!("") + } + + ':' if chars.get(idx + 1) == Some(&':') => { + if buf.is_empty() { + error!({loc => self.loc()}, "Invalid char in struct path token, expected /a-z|A-Z|0-9|_|-/ got '.'"); + bail!("") + } + idx += 2; // skip :: + self.loc.col += 2; + p_buf.push(buf.clone()); + buf.clear(); + } + + c => { + buf.push(c); + idx += 1; + self.loc.inc_col(); + } + } + } + } + + '.' if !buf.is_empty() => { + let mut p_buf = vec![buf.clone()]; + buf.clear(); + idx += 1; // skip . + self.loc.inc_col(); + + while idx < chars.len() { + match chars[idx] { + ' ' | '\n' | '\r' => { + if !p_buf.is_empty() { + p_buf.push(buf.clone()); + } + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::StructItem(p_buf.clone())), start_loc.clone(), p_buf.clone().join("."))); + buf.clear(); + break; + } + c @ ('\'' | '"') => { + error!({loc => self.loc()}, "Invalid char in struct access token, expected /a-z|A-Z|0-9|_|-/ got {c}"); + bail!("") + } + + '.' => { + if buf.is_empty() { + error!({loc => self.loc()}, "Invalid char in struct access token, expected /a-z|A-Z|0-9|_|-/ got '.'"); + bail!("") + } + idx += 1; // skip . + self.loc.col += 1; + p_buf.push(buf.clone()); + buf.clear(); + } + + c => { + buf.push(c); + idx += 1; + self.loc.inc_col(); + } + } + } + } ch @ (' ' | '\n' | '\r') => { if ch == '\n' { @@ -219,66 +301,69 @@ impl Lexer { fn match_token_type(&self, s: &str) -> TokenType { match s { - "if" => TokenType::Keyword(KeywordType::If), - "else" => TokenType::Keyword(KeywordType::Else), - "end" => TokenType::Keyword(KeywordType::End), - "while" => TokenType::Keyword(KeywordType::While), - "do" => TokenType::Keyword(KeywordType::Do), - "include" => TokenType::Keyword(KeywordType::Include), - "memory" => TokenType::Keyword(KeywordType::Memory), - "const" => TokenType::Keyword(KeywordType::Constant), - "fn" => TokenType::Keyword(KeywordType::Function), - "then" => TokenType::Keyword(KeywordType::Then), - "done" => TokenType::Keyword(KeywordType::Done), - "struct" => TokenType::Keyword(KeywordType::Struct), - "inline" => TokenType::Keyword(KeywordType::Inline), - "export" => TokenType::Keyword(KeywordType::Export), - "extern" => TokenType::Keyword(KeywordType::Extern), - "returns" => TokenType::Keyword(KeywordType::Returns), - "with" => TokenType::Keyword(KeywordType::With), - "drop" => TokenType::Instruction(InstructionType::Drop), - "_dbg_print"=> TokenType::Instruction(InstructionType::Print), - "dup" => TokenType::Instruction(InstructionType::Dup), - "rot" => TokenType::Instruction(InstructionType::Rot), - "over" => TokenType::Instruction(InstructionType::Over), - "swap" => TokenType::Instruction(InstructionType::Swap), - "sub" => TokenType::Instruction(InstructionType::Minus), - "add" => TokenType::Instruction(InstructionType::Plus), - "eq" => TokenType::Instruction(InstructionType::Equals), - "gt" => TokenType::Instruction(InstructionType::Gt), - "lt" => TokenType::Instruction(InstructionType::Lt), - "ge" => TokenType::Instruction(InstructionType::Ge), - "le" => TokenType::Instruction(InstructionType::Le), - "neq" => TokenType::Instruction(InstructionType::NotEquals), - "band" => TokenType::Instruction(InstructionType::Band), - "bor" => TokenType::Instruction(InstructionType::Bor), - "shr" => TokenType::Instruction(InstructionType::Shr), - "shl" => TokenType::Instruction(InstructionType::Shl), - "divmod" => TokenType::Instruction(InstructionType::DivMod), - "mul" => TokenType::Instruction(InstructionType::Mul), - "read8" => TokenType::Instruction(InstructionType::Read8), - "write8" => TokenType::Instruction(InstructionType::Write8), - "read32" => TokenType::Instruction(InstructionType::Read32), - "write32" => TokenType::Instruction(InstructionType::Write32), - "read64" => TokenType::Instruction(InstructionType::Read64), - "write64" => TokenType::Instruction(InstructionType::Write64), - "syscall0" => TokenType::Instruction(InstructionType::Syscall0), - "syscall1" => TokenType::Instruction(InstructionType::Syscall1), - "syscall2" => TokenType::Instruction(InstructionType::Syscall2), - "syscall3" => TokenType::Instruction(InstructionType::Syscall3), - "syscall4" => TokenType::Instruction(InstructionType::Syscall4), - "syscall5" => TokenType::Instruction(InstructionType::Syscall5), - "syscall6" => TokenType::Instruction(InstructionType::Syscall6), - "(bool)" => TokenType::Instruction(InstructionType::CastBool), - "(ptr)" => TokenType::Instruction(InstructionType::CastPtr), - "(int)" => TokenType::Instruction(InstructionType::CastInt), - "(void)" => TokenType::Instruction(InstructionType::CastVoid), - "bool" => TokenType::Instruction(InstructionType::TypeBool), - "ptr" => TokenType::Instruction(InstructionType::TypePtr), - "int" => TokenType::Instruction(InstructionType::TypeInt), - "void" => TokenType::Instruction(InstructionType::TypeVoid), - "any" => TokenType::Instruction(InstructionType::TypeAny), - "return" => TokenType::Instruction(InstructionType::Return), + "if" => TokenType::Keyword(KeywordType::If), + "else" => TokenType::Keyword(KeywordType::Else), + "end" => TokenType::Keyword(KeywordType::End), + "while" => TokenType::Keyword(KeywordType::While), + "do" => TokenType::Keyword(KeywordType::Do), + "include" => TokenType::Keyword(KeywordType::Include), + "memory" => TokenType::Keyword(KeywordType::Memory), + "const" => TokenType::Keyword(KeywordType::Constant), + "fn" => TokenType::Keyword(KeywordType::Function), + "then" => TokenType::Keyword(KeywordType::Then), + "done" => TokenType::Keyword(KeywordType::Done), + "typedef" => TokenType::Keyword(KeywordType::TypeDef), + "structdef" => TokenType::Keyword(KeywordType::StructDef), + "inline" => TokenType::Keyword(KeywordType::Inline), + "export" => TokenType::Keyword(KeywordType::Export), + "extern" => TokenType::Keyword(KeywordType::Extern), + "returns" => TokenType::Keyword(KeywordType::Returns), + "with" => TokenType::Keyword(KeywordType::With), + "drop" => TokenType::Instruction(InstructionType::Drop), + "_dbg_print" => TokenType::Instruction(InstructionType::Print), + "dup" => TokenType::Instruction(InstructionType::Dup), + "rot" => TokenType::Instruction(InstructionType::Rot), + "over" => TokenType::Instruction(InstructionType::Over), + "swap" => TokenType::Instruction(InstructionType::Swap), + "sub" => TokenType::Instruction(InstructionType::Minus), + "add" => TokenType::Instruction(InstructionType::Plus), + "eq" => TokenType::Instruction(InstructionType::Equals), + "gt" => TokenType::Instruction(InstructionType::Gt), + "lt" => TokenType::Instruction(InstructionType::Lt), + "ge" => TokenType::Instruction(InstructionType::Ge), + "le" => TokenType::Instruction(InstructionType::Le), + "neq" => TokenType::Instruction(InstructionType::NotEquals), + "band" => TokenType::Instruction(InstructionType::Band), + "bor" => TokenType::Instruction(InstructionType::Bor), + "shr" => TokenType::Instruction(InstructionType::Shr), + "shl" => TokenType::Instruction(InstructionType::Shl), + "divmod" => TokenType::Instruction(InstructionType::DivMod), + "mul" => TokenType::Instruction(InstructionType::Mul), + "read8" => TokenType::Instruction(InstructionType::Read8), + "write8" => TokenType::Instruction(InstructionType::Write8), + "read32" => TokenType::Instruction(InstructionType::Read32), + "write32" => TokenType::Instruction(InstructionType::Write32), + "read64" => TokenType::Instruction(InstructionType::Read64), + "write64" => TokenType::Instruction(InstructionType::Write64), + "syscall0" => TokenType::Instruction(InstructionType::Syscall0), + "syscall1" => TokenType::Instruction(InstructionType::Syscall1), + "syscall2" => TokenType::Instruction(InstructionType::Syscall2), + "syscall3" => TokenType::Instruction(InstructionType::Syscall3), + "syscall4" => TokenType::Instruction(InstructionType::Syscall4), + "syscall5" => TokenType::Instruction(InstructionType::Syscall5), + "syscall6" => TokenType::Instruction(InstructionType::Syscall6), + "(bool)" => TokenType::Instruction(InstructionType::CastBool), + "(ptr)" => TokenType::Instruction(InstructionType::CastPtr), + "(int)" => TokenType::Instruction(InstructionType::CastInt), + "(void)" => TokenType::Instruction(InstructionType::CastVoid), + "return" => TokenType::Instruction(InstructionType::Return), + "ptr" => TokenType::Type(TypeType::Ptr), + "u8" => TokenType::Type(TypeType::U8), + "u16" => TokenType::Type(TypeType::U16), + "u32" => TokenType::Type(TypeType::U32), + "u64" => TokenType::Type(TypeType::U64), + "void" => TokenType::Type(TypeType::Void), + "any" => TokenType::Type(TypeType::Any), t => TokenType::Unknown(t.to_string()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 703d2bb..e726310 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6,7 +6,7 @@ use std::{collections::HashMap, path::Path}; use anyhow::{bail, Result}; -use crate::{cli::CliArgs, lexer::Lexer, types::{ast::{AstNode, Block, ConstUse, Constant, FnCall, Function, If, MemUse, Memory, Module, Program, While}, common::Loc, token::{InstructionType, KeywordType, Token, TokenType}}}; +use crate::{cli::CliArgs, lexer::Lexer, types::{ast::{AstNode, Block, ConstUse, Constant, FnCall, Function, If, MemSize, MemUse, Memory, Module, Program, StructDef, While}, common::Loc, token::{InstructionType, KeywordType, Token, TokenType, TypeType}}}; use self::{builtin::get_builtin_symbols, precompiler::{precompile_const, precompile_mem}, utils::{expect, peek_check, peek_check_multiple, PeekResult}}; @@ -16,6 +16,7 @@ bitflags::bitflags! { const EXTERN = 1 << 0; const EXPORT = 1 << 1; const INLINE = 1 << 2; + const ALLOW_TYPES = 1 << 3; } } @@ -35,7 +36,7 @@ pub fn parse(cli_args: &CliArgs, tokens: &mut Vec) -> Result { functions: HashMap::new(), constants: HashMap::new(), memories: HashMap::new(), - + struct_defs: HashMap::new() }; let syms = get_builtin_symbols(&mut prog); @@ -67,16 +68,17 @@ fn parse_next(cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, f let ret = match &token.typ { TokenType::Keyword(kw) => { match kw { - KeywordType::If => parse_if(&token, cli_args, prog, tokens)?, - KeywordType::While => parse_while(&token, cli_args, prog, tokens)?, - KeywordType::Include => parse_include(&token, cli_args, prog, tokens)?, - KeywordType::Memory => parse_memory(&token, cli_args, prog, tokens, is_module_root)?, - KeywordType::Constant => parse_const(&token, cli_args, prog, tokens)?, - KeywordType::Function => parse_function(&token, cli_args, prog, tokens, flags)?, - KeywordType::Struct => todo!(), - KeywordType::Inline => parse_inline(&token, cli_args, prog, tokens, flags)?, - KeywordType::Export => parse_export(&token, cli_args, prog, tokens, flags)?, - KeywordType::Extern => parse_extern(&token, cli_args, prog, tokens, flags)?, + KeywordType::If => parse_if(&token, cli_args, prog, tokens)?, + KeywordType::While => parse_while(&token, cli_args, prog, tokens)?, + KeywordType::Include => parse_include(&token, cli_args, prog, tokens)?, + KeywordType::Memory => parse_memory(&token, cli_args, prog, tokens, is_module_root)?, + KeywordType::Constant => parse_const(&token, cli_args, prog, tokens)?, + KeywordType::Function => parse_function(&token, cli_args, prog, tokens, flags)?, + KeywordType::StructDef => parse_struct(&token, cli_args, prog, tokens)?, + KeywordType::TypeDef => todo!(), + KeywordType::Inline => parse_inline(&token, cli_args, prog, tokens, flags)?, + KeywordType::Export => parse_export(&token, cli_args, prog, tokens, flags)?, + KeywordType::Extern => parse_extern(&token, cli_args, prog, tokens, flags)?, kw => { dbg!(&prog.constants); error!({loc => token.loc}, "Unexpected token {kw:?}"); @@ -89,7 +91,11 @@ fn parse_next(cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, f error!({loc => token.loc}, "Unexpected token {it:?}, please create a main function, this is not a scripting language"); bail!("") } else { - AstNode::Token(token) + match it { + InstructionType::StructPath(p) => parse_struct_path(&token, prog, p)?, + InstructionType::StructItem(p) => parse_struct_item(&token, prog, p)?, + _ => AstNode::Token(token) + } } }, TokenType::Unknown(ut) => { @@ -101,10 +107,162 @@ fn parse_next(cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, f parse_unknown(&token, cli_args, prog, tokens, flags)? } }, + TokenType::Type(t) => { + if flags.contains(Flags::ALLOW_TYPES) { + AstNode::Token(token) + } else { + error!({loc => token.loc}, "Unexpected type {t:?}"); + bail!("") + } + }, }; Ok(ret) } +fn parse_struct_item(org: &Token, prog: &mut Program, p: &Vec) -> Result { + fn find_disp(strct: &StructDef, disp: &mut usize, path: &[String]) { + let Some(p) = path.get(0) else { + return + }; + + for item in &strct.body { + if p == &item.0 { + match &item.2 { + TypeType::Struct(strct) => { + *disp += item.1; + find_disp(strct, disp, &path[1..]) + }, + _ => { + *disp += item.1; + } + } + } + } + + } + if let Some(mem) = prog.memories.get(&p[0].to_string()) { + match &mem.size { + MemSize::Size(_) => { + error!({loc => org.loc()}, "You can only access items in structs"); + bail!("") + }, + MemSize::Type(t) => { + match t { + TypeType::Struct(s) => { + + let mut disp = 0; + find_disp(&s, &mut disp, &p[1..]); + return Ok(AstNode::MemUse(MemUse{ + ident: p[0].clone(), + loc: org.loc(), + disp: Some(disp) + })); + }, + _ => { + error!({loc => org.loc()}, "You can only access items in structs"); + bail!("") + } + } + }, + } + } + + error!("Failed to find memory {}", p[0]); + bail!("") +} + +fn parse_struct_path(org: &Token, prog: &mut Program, p: &Vec) -> Result { + + fn find_disp(strct: &StructDef, disp: &mut usize, path: &[String]) { + let Some(p) = path.get(0) else { + return + }; + + for item in &strct.body { + if p == &item.0 { + match &item.2 { + TypeType::Struct(strct) => { + *disp += item.1; + find_disp(strct, disp, &path[1..]) + }, + _ => { + *disp += item.1; + } + } + } + } + + } + let mut disp = 0; + if let Some(strct) = prog.struct_defs.get(&p[0].to_string()) { + find_disp(strct, &mut disp, &p[1..]); + return Ok(AstNode::StructDispPush{ + ident: org.lexem.clone(), + loc: org.loc(), + disp + }); + } + + error!("Failed to find struct {}", p[0]); + bail!("") +} + +fn parse_struct(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec) -> Result { + let ident = expect(tokens, TokenType::Unknown(String::new()))?; + expect(tokens, TokenType::Keyword(KeywordType::Do))?; + + + let mut body: Vec<(String, usize, TypeType)> = Vec::new(); + let mut size = 0; + + loop { + let ident = expect(tokens, TokenType::Unknown(String::new()))?; + expect(tokens, TokenType::Keyword(KeywordType::Do))?; + let typ = parse_next(cli_args, prog, tokens, Flags::ALLOW_TYPES, false)?; + let (typ, disp) = match &typ { + AstNode::Token(t) => { + match &t.typ { + TokenType::Type(t) => { + let disp = size; + size += t.get_size(); + (t, disp) + } + _ => { + error!({loc => t.loc()}, "Expected type, got {t:?}"); + bail!("") + } + } + }, + t => { + error!({loc => typ.loc()}, "Expected type, got {t:?}"); + bail!("") + } + }; + expect(tokens, TokenType::Keyword(KeywordType::End))?; + + body.push((ident.lexem, disp, typ.clone())); + + if peek_check(tokens, TokenType::Keyword(KeywordType::Done)).correct(){ + tokens.pop(); + break; + } + // if peek_check(tokens, TokenType::Keyword(KeywordType::End)).correct() + }; + + + + let def = StructDef{ + loc: org.loc(), + ident: ident.lexem.clone(), + body, + size, + }; + + prog.struct_defs.insert(ident.lexem, def.clone()); + + Ok(AstNode::StructDef(def)) +} + fn parse_memory(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, is_module_root: bool) -> Result { let name = expect(tokens, TokenType::Unknown(String::new()))?; @@ -118,7 +276,7 @@ fn parse_memory(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mu PeekResult::Wrong(_) => (), PeekResult::None => panic!("idk what to do herre"), } - body.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + body.push(parse_next(cli_args, prog, tokens, Flags::ALLOW_TYPES, false)?); } expect(tokens, TokenType::Keyword(KeywordType::End))?; @@ -152,13 +310,21 @@ fn parse_function(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: & loop { if let PeekResult::Correct(t) = peek_check_multiple(tokens, vec![ - TokenType::Instruction(InstructionType::TypeAny), - TokenType::Instruction(InstructionType::TypeBool), - TokenType::Instruction(InstructionType::TypeInt), - TokenType::Instruction(InstructionType::TypePtr), - TokenType::Instruction(InstructionType::TypeVoid), + TokenType::Type(TypeType::Any), + TokenType::Type(TypeType::U8), + TokenType::Type(TypeType::U16), + TokenType::Type(TypeType::U32), + TokenType::Type(TypeType::U64), + TokenType::Type(TypeType::Ptr), + TokenType::Type(TypeType::Void), + TokenType::Type(TypeType::Custom(Vec::new())), ]) { - args.push(t.typ.clone()); + match &t.typ { + TokenType::Type(tt) => { + args.push(tt.clone()); + } + _ => unreachable!() + } } else { break; } @@ -171,13 +337,21 @@ fn parse_function(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: & loop { if let PeekResult::Correct(t) = peek_check_multiple(tokens, vec![ - TokenType::Instruction(InstructionType::TypeAny), - TokenType::Instruction(InstructionType::TypeBool), - TokenType::Instruction(InstructionType::TypeInt), - TokenType::Instruction(InstructionType::TypePtr), - TokenType::Instruction(InstructionType::TypeVoid), + TokenType::Type(TypeType::Any), + TokenType::Type(TypeType::U8), + TokenType::Type(TypeType::U16), + TokenType::Type(TypeType::U32), + TokenType::Type(TypeType::U64), + TokenType::Type(TypeType::Ptr), + TokenType::Type(TypeType::Void), + TokenType::Type(TypeType::Custom(Vec::new())), ]) { - ret_args.push(t.typ.clone()); + match &t.typ { + TokenType::Type(tt) => { + ret_args.push(tt.clone()); + } + _ => unreachable!() + } } else { break; } @@ -235,7 +409,7 @@ fn parse_if(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Ve PeekResult::Wrong(w) => { match w.typ { TokenType::Keyword(KeywordType::Then) => { - warn!("If is defined as `if ... do ... done`"); + warn!({loc => w.loc()}, "If is defined as `if ... do ... done`"); } _ => () } @@ -502,6 +676,7 @@ fn parse_include(_: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut functions: prog.functions.clone(), constants: prog.constants.clone(), memories: prog.memories.clone(), + struct_defs: prog.struct_defs.clone(), }; @@ -563,7 +738,7 @@ fn parse_const(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut fn parse_unknown(org: &Token, _: &CliArgs, prog: &mut Program, _: &mut Vec, _: Flags ) -> Result { //TODO: Typing? - if let Some(func) = prog.functions.get(&org.lexem) { + if let Some(func) = prog.functions.get(&org.lexem.clone()) { if func.inline { return Ok(AstNode::Block(Block{ loc: org.loc.clone(), body: func.body.clone(), comment: format!("inline fn {}", func.ident) })) } else { @@ -571,15 +746,42 @@ fn parse_unknown(org: &Token, _: &CliArgs, prog: &mut Program, _: &mut Vec>(); + // dbg!(prog.struct_defs.clone()); + // if let Some(t) = prog.struct_defs.get(&pth[0].to_string()) { + // if let Some(i) = t.body.iter().find(|i| i.0 == pth[1].to_string()) { + // return Ok(AstNode::StructDispPush{ + // ident: org.lexem.clone(), + // loc: org.loc(), + // disp: i.1 + // }); + + // } + // } + // } + + + // dbg!(&prog.constants); + debug!({loc => org.loc.clone()}, "Unknown token"); error!({loc => org.loc.clone()}, "Unknown token {:?}", org); bail!("") } \ No newline at end of file diff --git a/src/parser/precompiler.rs b/src/parser/precompiler.rs index 8c0b163..c8fd8e4 100644 --- a/src/parser/precompiler.rs +++ b/src/parser/precompiler.rs @@ -1,17 +1,56 @@ use anyhow::bail; -use crate::types::{ast::{AstNode, Program}, common::Loc, token::{InstructionType, TokenType}}; +use crate::types::{ast::{AstNode, MemSize, Program}, common::Loc, token::{InstructionType, TokenType, TypeType}}; -pub fn precompile_mem(prog: &Program, ast: Vec ) -> anyhow::Result { +pub fn precompile_mem(prog: &Program, ast: Vec ) -> anyhow::Result { + match &ast[0] { + AstNode::Token(t) => { + match &t.typ { + TokenType::Type(_) => { + let mut buf = vec![]; + let mut i = 0; + while ast.len() > i { + match &ast[i] { + AstNode::Token(t) => { + match &t.typ { + TokenType::Type(t) => { + match t { + TypeType::Struct(s) => { + return Ok(MemSize::Type(TypeType::Struct(s.clone()))); + }, + _ => () + } + buf.push(t.clone()); + i += 1; + } + _ => { + error!({loc => t.loc()}, "Cannot use a type and a number as a memory size at the same time"); + bail!("") + } + } + }, + _ => { + error!({loc => t.loc()}, "Cannot use a type and a number as a memory size at the same time"); + bail!("") + } + } + } + return Ok(MemSize::Type(TypeType::Custom(buf))); + } + _ => () + } + }, + _ => (), + } match precompile_const(prog, ast, &mut Vec::new()) { Ok(v) => { match v { AstNode::Int(_, i) => { - return Ok(i) + return Ok(MemSize::Size(i)) } _ => { - error!("memories can only have numbers or types in their size"); + error!({loc => v.loc()}, "Can only have a type or a number as a memory size"); bail!("") } } @@ -142,7 +181,11 @@ pub fn precompile_const(prog: &Program, ast: Vec, stack: &mut Vec todo!(), + TokenType::Unknown(_) => unreachable!(), + TokenType::Type(_) => { + error!({loc => t.loc()}, "Cannot use a type and a number as a memory size at the same time"); + bail!("") + }, } }, //TODO: Implement these diff --git a/src/parser/utils.rs b/src/parser/utils.rs index 9b08b88..484cac2 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -41,6 +41,9 @@ pub fn cmp(lhs: &TokenType, rhs: &TokenType) -> bool { (TokenType::Instruction(lhs), TokenType::Instruction(rhs)) => { std::mem::discriminant(lhs) == std::mem::discriminant(rhs) }, + (TokenType::Type(lhs), TokenType::Type(rhs)) => { + std::mem::discriminant(lhs) == std::mem::discriminant(rhs) + }, (TokenType::Unknown(_), TokenType::Unknown(_)) => true, _ => false } @@ -50,7 +53,7 @@ pub fn peek_check_multiple(tokens: &Vec, typs: Vec) -> PeekRes let t = tokens.last(); if let Some(t) = t { - for tt in typs { + for tt in typs.clone() { if cmp(&t.typ, &tt) { return PeekResult::Correct(t); } @@ -86,7 +89,7 @@ pub fn expect(tokens: &mut Vec, typ: TokenType) -> Result { Some(t) => { //? Source: https://doc.rust-lang.org/std/mem/fn.discriminant.html if std::mem::discriminant(&t.typ) != std::mem::discriminant(&typ) { - error!("Expected {:?}, but got {:?}", typ, t.typ); + error!({loc => t.loc()}, "Expected {:?}, but got {:?}", typ, t.typ); bail!("") } Ok(t) diff --git a/src/types/ast/mod.rs b/src/types/ast/mod.rs index 5044b99..204c120 100644 --- a/src/types/ast/mod.rs +++ b/src/types/ast/mod.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use super::{common::Loc, token::{Token, TokenType}}; +use super::{common::Loc, token::{Token, TypeType}}; //TODO: Implement missing stuff @@ -23,17 +23,23 @@ pub enum AstNode { // ident: String, // value: InstructionType // }, - // Struct{ - // loc: Loc, - // ident: String, - // body: Vec<(String, usize)> // (field ident, size in bytes) - // }, // StructDef{ // loc: Loc, // extrn: bool, // ident: String, // body: Vec<(String, usize)> // (field ident, size in bytes) // }, + StructDef(StructDef), + StructDispPush{ + loc: Loc, + disp: usize, + ident: String, + }, + // StructItemPush{ + // loc: Loc, + // disp: usize, + // ident: String, + // }, If(If), While(While), Module(Module), @@ -63,14 +69,25 @@ impl AstNode { AstNode::Str(loc, _) => loc.clone(), AstNode::CStr(loc, _) => loc.clone(), AstNode::Char(loc, _) => loc.clone(), + AstNode::StructDef(s) => s.loc.clone(), + AstNode::StructDispPush { loc, ..} => loc.clone(), + // AstNode::StructItemPush { loc, .. } => loc.clone(), } } } +#[derive(Debug, Clone, PartialEq)] +pub struct StructDef { + pub loc: Loc, + pub ident: String, + pub body: Vec<(String, usize, TypeType)>, // (field ident, size in bytes) + pub size: usize +} #[derive(Debug, Clone)] pub struct MemUse { pub loc: Loc, pub ident: String, + pub disp: Option } #[derive(Debug, Clone)] pub struct ConstUse { @@ -119,8 +136,8 @@ pub struct Function { pub inline: bool, pub extrn: bool, pub export: bool, - pub arg_types: Vec, - pub ret_types: Vec, + pub arg_types: Vec, + pub ret_types: Vec, pub body: Vec } @@ -136,7 +153,7 @@ pub struct Memory { pub loc: Loc, pub ident: String, pub statc: bool, - pub size: usize // bytes + pub size: MemSize // bytes } @@ -146,6 +163,31 @@ pub struct Program { pub functions: HashMap, pub constants: HashMap, pub memories: HashMap, + pub struct_defs: HashMap, +} + +#[derive(Debug, Clone)] +pub enum MemSize { + Size(usize), + Type(TypeType) +} + +impl EscIdent for FnCall { + fn ident(&self) -> String { + self.ident.clone() + } +} + +impl EscIdent for ConstUse { + fn ident(&self) -> String { + self.ident.clone() + } +} + +impl EscIdent for MemUse { + fn ident(&self) -> String { + self.ident.clone() + } } impl EscIdent for Constant { diff --git a/src/types/token/mod.rs b/src/types/token/mod.rs index c757cf3..8b33b3a 100644 --- a/src/types/token/mod.rs +++ b/src/types/token/mod.rs @@ -1,6 +1,6 @@ #![allow(dead_code)] -use super::common::Loc; +use super::{ast::StructDef, common::Loc}; #[derive(Debug, Clone, PartialEq)] pub enum InstructionType { @@ -10,6 +10,8 @@ pub enum InstructionType { PushStr(String), PushCStr(String), PushChar(char), + StructPath(Vec), // foo::bar + StructItem(Vec), // foo.bar Drop, Print, Dup, @@ -55,14 +57,6 @@ pub enum InstructionType { CastPtr, CastInt, CastVoid, - - // typing - TypeBool, - TypePtr, - TypeInt, - TypeVoid, - // TypeStr, - TypeAny, FnCall, MemUse, @@ -83,7 +77,8 @@ pub enum KeywordType { Function, Then, Done, - Struct, + StructDef, + TypeDef, Inline, Export, Extern, @@ -91,9 +86,56 @@ pub enum KeywordType { With, } +#[derive(Clone, PartialEq)] +pub enum TypeType { + Ptr, + U8, + U16, + U32, + U64, + Void, + Any, + Custom(Vec), + Struct(StructDef) +} + +impl TypeType { + pub fn get_size(&self) -> usize { + match self { + TypeType::Ptr => std::mem::size_of::<*const ()>(), + TypeType::U8 => 1, + TypeType::U16 => 2, + TypeType::U32 => 4, + TypeType::U64 => 8, + TypeType::Void => 0, + TypeType::Any => 0, + TypeType::Custom(ts) => ts.iter().map(|f| f.get_size()).sum(), + TypeType::Struct(s) => s.size, + } + } +} + +impl std::fmt::Debug for TypeType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Ptr => write!(f, "Ptr"), + Self::U8 => write!(f, "U8"), + Self::U16 => write!(f, "U16"), + Self::U32 => write!(f, "U32"), + Self::U64 => write!(f, "U64"), + Self::Void => write!(f, "Void"), + Self::Any => write!(f, "Any"), + Self::Custom(arg0) => f.debug_tuple("Custom").field(arg0).finish(), + Self::Struct(arg0) => write!(f, "{} {}{:?}", arg0.size, arg0.ident, arg0.body), + } + } +} + + #[derive(Debug, Clone, PartialEq)] pub enum TokenType { Keyword(KeywordType), + Type(TypeType), Instruction(InstructionType), Unknown(String) } diff --git a/test.mcl b/test.mcl index 821db5e..a7131f2 100644 --- a/test.mcl +++ b/test.mcl @@ -1,11 +1,17 @@ include "std.mcl" -// structdef Foo do -// buz do sizeof(u64) end -// baz do sizeof(u64) end -// done +structdef Uwu do + owo do u64 end + twt do u64 end +done -memory s_foo sizeof(u32) end +structdef Foo do + buz do u64 end + uwu do Uwu end +done + + +memory s_foo Foo end //? Comments :3 @@ -15,25 +21,29 @@ memory s_foo sizeof(u32) end // fn putd with int returns void then drop done -fn main with int ptr returns int then - 1 2 add - 69 _dbg_print - "Hewo\n" puts +fn main with void returns void then - if 3 4 eq do - "omg what impossible!\n" - else if 1 1 eq do - "whaaaaaaaaa\n" - else - "finally, some good soup\n" - done - puts + s_foo.uwu.twt 69 write64 - 10 - while dup 0 gt do - "uwu " puts - dup _dbg_print - 1 sub - done + s_foo.uwu.twt read64 _dbg_print + // 1 2 add + // 69 _dbg_print + // "Hewo\n" puts + + // if 3 4 eq do + // "omg what impossible!\n" + // else if 1 1 eq do + // "whaaaaaaaaa\n" + // else + // "finally, some good soup\n" + // done + // puts + + // 10 + // while dup 0 gt do + // "uwu " puts + // dup _dbg_print + // 1 sub + // done done