From f338f07e7dbb45505309c929c28f3d4fc168a33e Mon Sep 17 00:00:00 2001 From: MCorange Date: Sun, 22 Dec 2024 02:53:21 +0200 Subject: [PATCH] Start of typechecking and other processing of ast after parsing --- src/main.rs | 3 +- src/parser/ast/mod.rs | 14 ++--- src/parser/mod.rs | 2 +- src/validator/mod.rs | 74 ++++++++++++++++++++++++-- src/validator/predefined.rs | 71 +++++++++++++++++++++++++ test.mcl | 101 +++++++----------------------------- 6 files changed, 169 insertions(+), 96 deletions(-) diff --git a/src/main.rs b/src/main.rs index c1fcefa..74258f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,7 +24,8 @@ fn main() -> anyhow::Result<()> { info!("Parsing {file}"); let mut prog = mclangc::parser::parse_program(tokens)?; info!("Validating {file}"); - let validated = mclangc::validator::validate_code(&mut prog)?; + mclangc::validator::validate_code(&mut prog)?; + dbg!(&prog); } Ok(()) } diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 868c967..6fd4886 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; -use statement::{Enum, Function, Struct, TypeAlias}; +use statement::{Enum, Function, Struct}; -use crate::common::loc::LocBox; +use crate::{common::loc::LocBox, validator::predefined::TypeType}; pub use crate::tokeniser::tokentype::*; pub mod expr; @@ -13,11 +13,11 @@ pub mod typ; #[derive(Debug, Clone)] pub struct Program { pub ast: expr::Block, - pub structs: HashMap, - pub enums: HashMap, - pub types: HashMap, - pub functions: HashMap, - pub member_functions: HashMap>, + pub structs: HashMap>, + pub enums: HashMap>, + pub types: HashMap, + pub functions: HashMap>, + pub member_functions: HashMap>>, } #[derive(Debug, Clone)] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8fe9755..3425305 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8,7 +8,7 @@ pub mod ast; mod expr; mod stat; mod utils; -mod typ; +pub mod typ; type Result = anyhow::Result; diff --git a/src/validator/mod.rs b/src/validator/mod.rs index 934612e..fd5c404 100644 --- a/src/validator/mod.rs +++ b/src/validator/mod.rs @@ -1,8 +1,74 @@ -use crate::parser::ast::Program; +use std::collections::HashMap; + +use crate::{common::loc::LocBox, parser::ast::{expr::Block, statement::{Statement, TypeAlias}, Ast, Program}}; + +pub mod predefined; + +pub fn validate_code(prog: &mut Program) -> anyhow::Result<()> { + let Block(items) = prog.ast.clone(); + predefined::load_builtin(prog); + collect_types(prog, &items); + //dbg!(&prog.types); + //dbg!(&prog.structs); + //dbg!(&prog.enums); + //dbg!(&prog.member_functions); + //dbg!(&prog.functions); + for item in items { + match item { + Ast::Statement(stat) => { + match stat.inner() { + Statement::Fn(func) => {} + Statement::Let { name, typ, val } => {} + Statement::ConstVar { name, typ, val } => {} + Statement::StaticVar { name, typ, val } => {} + Statement::Enum(enm) => {} + Statement::Struct(strct) => {} + Statement::TypeAlias(alias) => {} + } + } + Ast::Expr(_) => unreachable!() + } + } - -pub fn validate_code(prog: &Program) -> anyhow::Result<()> { - Ok(()) } + +fn collect_types(prog: &mut Program, items: &Vec) { + for item in items { + match item { + Ast::Statement(stat) => { + match stat.inner() { + Statement::Fn(func)=> { + if let Some(struct_name) = &func.struct_name { + if let Some(v) = prog.member_functions.get_mut(&struct_name) { + v.insert(func.name.clone(), LocBox::new(stat.loc(), func.clone())); + } else { + let mut v = HashMap::new(); + v.insert(func.name.clone(), LocBox::new(stat.loc(), func.clone())); + prog.member_functions.insert(struct_name.clone(), v); + } + } else { + prog.functions.insert(func.name.clone(), LocBox::new(stat.loc(), func.clone())); + } + } + Statement::Enum(enm) => { + prog.enums.insert(enm.name.clone(), LocBox::new(stat.loc(), enm.clone())); + } + Statement::Struct(strct) => { + prog.structs.insert(strct.name.clone(), LocBox::new(stat.loc(), strct.clone())); + } + Statement::TypeAlias(alias) => { + let typ = alias.clone().typ.inner().clone(); + prog.types.insert(alias.name.clone(), predefined::TypeType::Normal(LocBox::new(stat.loc(), typ))); + } + Statement::Let { .. } | + Statement::ConstVar { .. } | + Statement::StaticVar { .. } => (), + } + } + Ast::Expr(_) => unreachable!() + } + } + +} diff --git a/src/validator/predefined.rs b/src/validator/predefined.rs index e69de29..01ff06f 100644 --- a/src/validator/predefined.rs +++ b/src/validator/predefined.rs @@ -0,0 +1,71 @@ +use std::collections::HashMap; +use lazy_static::lazy_static; +use crate::common::Loc; +use crate::parser::typ::parse_type; +use crate::{common::loc::LocBox, parser::ast::{statement::Function, typ::Type, Ident, Program}}; + +#[cfg(target_arch="x86_64")] +const SIZE: usize = 8; +#[cfg(target_arch="x86")] +const SIZE: usize = 4; + +lazy_static!( + pub static ref TYPES_RAW: HashMap<&'static str, usize> = [ + ("void", 0), + ("usize", SIZE), + ("isize", SIZE), + ("u8", 1), + ("u16", 2), + ("u32", 4), + ("u64", 8), + ("i8", 1), + ("i16", 2), + ("i32", 4), + ("i64", 8), + ].into(); + pub static ref FUNCTIONS: HashMap<&'static str, (Vec<(&'static str, &'static str)>, &'static str)> = [ + ("syscall", (vec![ + ("arg_count", "&u8"), + ("sc_num", "usize"), + ("args", "&[&void]") + ], "usize")), + ].into(); +); + +#[derive(Debug, Clone)] +pub enum TypeType { + Normal(LocBox), + Builtin(usize), +} + +pub fn load_builtin(prog: &mut Program) { + for (name, size) in TYPES_RAW.iter() { + prog.types.insert(Ident(name.to_string()), TypeType::Builtin(*size)); + } + + for (name, (args, ret_typ)) in FUNCTIONS.iter() { + let mut params = Vec::new(); + let mut ret_type = None; + if ret_typ.len() > 0 { + let mut ret_t_tokens = crate::tokeniser::tokenise(&ret_typ, "(internal)").unwrap(); + let typ = parse_type(&mut ret_t_tokens).unwrap(); + ret_type = Some(LocBox::new(&Loc::default(), typ.inner().clone())); + } + for (name, typ) in args { + let mut tokens = crate::tokeniser::tokenise(&typ, "(internal)").unwrap(); + let typ = parse_type(&mut tokens).unwrap(); + params.push((Ident(name.to_string()), LocBox::new(&Loc::new("(internal)", 0, 0), typ.inner().clone()))); + } + + let f = Function { + struct_name: None, + name: Ident(name.to_string()), + params, + ret_type, + qual_const: false, + qual_extern: None, + body: None + }; + prog.functions.insert(Ident(name.to_string()), LocBox::new(&Loc::new("(internal)", 0, 0), f)); + } +} diff --git a/test.mcl b/test.mcl index 54a4b8f..815c2c3 100644 --- a/test.mcl +++ b/test.mcl @@ -1,86 +1,21 @@ - -enum Wah { - A, - B, - C, - D -} - -struct Baz { - owo: i32, - uwu: usize -} - -/// Type definitions -// type Rah = &Baz; - -/// Different kinds of functions -// Normal function -// fn main(a: &Foo, b: Rah) -> Nya { - /// General expressions (math) - // Works - // let a = 1 * 3 == 4; - // let b = 3/4 == *a; - // let c = (a->b.c->d) / 2; - // let d = 2 / a->b.c->d; - // let e = a->b.c->d / 2; - // let f = a.b.c.d / 2; - // let g = a.b[a.c] * 5; - - // No worky - // nothing! yay! - - /// Struct literals - // let a = Baz { - // owo: a, - // uwu: b + c / d - // }; - - /// If statement - // if 1 > 3 { - // ";3" - // } else - // if *a == 3 { - // ":0" - // } else { - // ">:(" - // } - - - /// 3 kinds of loops all doing the same thing - /// While loops - // let iw = 0; - // while iw < 10 { - // println("Owo"); - // } - - /// For loops - // for let ifr = 0 ; ifr < 20 ; ifr += 1 { - // println("nya"); - // } - - /// Infinite loops - // let il = 0; - // loop { - // if il > 10 { - // break; - // } - // println("Rah"); - // } - - /// Function Calls - // println(":3"); +//type str = [u8]; +// +//struct Foo { +// a: usize, +// b: &str +//} +// +//fn Foo.new(a: usize, b: &str) -> Foo { +// return Foo { +// a: a, +// b: b +// }; +//} +// +// +//fn main() { +// let obj = Foo::new(); +// //} -// Struct member function with inner data -//fn Baz.main(self: &mut Baz, a: &Foo, b: &mut Bar) -> &Nya; - -// Struct member function without any data a.k.a a static member func -//fn Baz.main(a: &Foo, b: &mut Bar) -> &Nya; - - - - - -