From 54b6df5862ea28999b76c7009127c0a7a32c07e7 Mon Sep 17 00:00:00 2001 From: MCorange Date: Sat, 21 Dec 2024 03:22:07 +0200 Subject: [PATCH] Initial --- .gitignore | 1 + Cargo.lock | 276 ++++++++++++++++++++++ Cargo.toml | 12 + src/bin/test/main.rs | 23 ++ src/cli.rs | 3 + src/common/loc.rs | 60 +++++ src/common/mod.rs | 2 + src/lib.rs | 7 + src/logger.rs | 110 +++++++++ src/main.rs | 11 + src/parser/ast/expr.rs | 103 ++++++++ src/parser/ast/literal.rs | 22 ++ src/parser/ast/mod.rs | 28 +++ src/parser/ast/statement.rs | 44 ++++ src/parser/ast/typ.rs | 17 ++ src/parser/expr.rs | 454 ++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 50 ++++ src/parser/stat.rs | 211 +++++++++++++++++ src/parser/typ.rs | 53 +++++ src/parser/utils.rs | 129 ++++++++++ src/tokeniser/mod.rs | 292 +++++++++++++++++++++++ src/tokeniser/tokentype.rs | 215 +++++++++++++++++ src/validator/mod.rs | 8 + src/validator/predefined.rs | 0 test.mcl | 86 +++++++ tests/parser/enums.mcl | 0 tests/parser/expr.mcl | 0 tests/parser/fn.mcl | 0 tests/parser/if.mcl | 0 tests/parser/loops.mcl | 0 tests/parser/structs.mcl | 0 31 files changed, 2217 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/bin/test/main.rs create mode 100644 src/cli.rs create mode 100644 src/common/loc.rs create mode 100644 src/common/mod.rs create mode 100644 src/lib.rs create mode 100644 src/logger.rs create mode 100644 src/main.rs create mode 100644 src/parser/ast/expr.rs create mode 100644 src/parser/ast/literal.rs create mode 100644 src/parser/ast/mod.rs create mode 100644 src/parser/ast/statement.rs create mode 100644 src/parser/ast/typ.rs create mode 100644 src/parser/expr.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/stat.rs create mode 100644 src/parser/typ.rs create mode 100644 src/parser/utils.rs create mode 100644 src/tokeniser/mod.rs create mode 100644 src/tokeniser/tokentype.rs create mode 100644 src/validator/mod.rs create mode 100644 src/validator/predefined.rs create mode 100644 test.mcl create mode 100644 tests/parser/enums.mcl create mode 100644 tests/parser/expr.mcl create mode 100644 tests/parser/fn.mcl create mode 100644 tests/parser/if.mcl create mode 100644 tests/parser/loops.mcl create mode 100644 tests/parser/structs.mcl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5487249 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,276 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "clap" +version = "4.5.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "mclangc" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "lazy_static", + "parse_int", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "parse_int" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d695b79916a2c08bcff7be7647ab60d1402885265005a6658ffe6d763553c5a" +dependencies = [ + "num-traits", +] + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..30d4b11 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "mclangc" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.94" +clap = { version = "4.5.23", features = ["derive"] } +lazy_static = "1.5.0" +parse_int = "0.6.0" diff --git a/src/bin/test/main.rs b/src/bin/test/main.rs new file mode 100644 index 0000000..f024d80 --- /dev/null +++ b/src/bin/test/main.rs @@ -0,0 +1,23 @@ + +/// Testing program for mclangc, taken inspiration from porth, which was made by tsoding :3 +#[derive(Debug, clap::Parser)] +#[command(version, about, long_about = None)] +struct CliArgs { + #[clap(subcommand)] + cmd: CliCmd +} + +#[derive(Debug, clap::Subcommand)] +pub enum CliCmd { + /// Run the tests + Run, + /// Run the tests and set the output as the expected output + Compile +} + + + +fn main() -> anyhow::Result<()> { + + Ok(()) +} diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..b28b04f --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,3 @@ + + + diff --git a/src/common/loc.rs b/src/common/loc.rs new file mode 100644 index 0000000..26bea1b --- /dev/null +++ b/src/common/loc.rs @@ -0,0 +1,60 @@ +use std::fmt::Display; + + + +#[derive(Debug, Clone, PartialEq, PartialOrd, Ord, Eq)] +pub struct Loc { + file: String, + line: usize, + col: usize, +} + +impl Loc { + pub fn new(s: impl ToString, line: usize, col: usize) -> Self { + Self { + file: s.to_string(), + line, col + } + } + fn file(&self) -> &String { + &self.file + } + fn line(&self) -> usize { + self.line + } + fn col(&self) -> usize { + self.col + } +} + +impl Display for Loc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}:{}", self.file, self.line, self.col) + } +} + +impl Default for Loc { + fn default() -> Self { + Self { + line: 1, + col: 1, + file: Default::default() + } + } +} + +pub trait LocIncr { + fn inc_line(&mut self); + fn inc_col(&mut self); +} + +impl LocIncr for Loc { + fn inc_line(&mut self) { + self.line += 1; + self.col = 1; + } + fn inc_col(&mut self) { + self.col += 1; + } + +} diff --git a/src/common/mod.rs b/src/common/mod.rs new file mode 100644 index 0000000..e833b61 --- /dev/null +++ b/src/common/mod.rs @@ -0,0 +1,2 @@ +pub mod loc; +pub use loc::Loc; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8003470 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,7 @@ +pub mod common; +pub mod tokeniser; +pub mod parser; +pub mod cli; +#[macro_use] +pub mod logger; +pub mod validator; diff --git a/src/logger.rs b/src/logger.rs new file mode 100644 index 0000000..bf8024c --- /dev/null +++ b/src/logger.rs @@ -0,0 +1,110 @@ +use crate::common::Loc; + + +#[repr(u8)] +#[derive(Debug, Default)] +pub enum Level { + Off = 0, + Error, + Warn, + #[default] + Info, + Help, + Debug +} + +const C_RESET: &'static str = "\x1B[0m"; +const C_ERROR: &'static str = "\x1B[1;31m"; +const C_WARN: &'static str = "\x1B[1;33m"; +const C_INFO: &'static str = "\x1B[1;32m"; +const C_DEBUG: &'static str = "\x1B[1;35m"; +const C_HELP: &'static str = "\x1B[1;36m"; + +pub fn _log(level: Level, str: &str) { + match level { + Level::Off => return, + Level::Error => println!("{C_ERROR}error{C_RESET}: {str}"), + Level::Warn => println!("{C_WARN}warn{C_RESET}: {str}"), + Level::Info => println!("{C_INFO}info{C_RESET}: {str}"), + Level::Help => println!("{C_HELP}help{C_RESET}: {str}"), + Level::Debug => println!("{C_DEBUG}debug{C_RESET}: {str}"), + } +} + +pub fn _log_with_loc(loc: &Loc, level: Level, str: &str) { + match level { + Level::Off => return, + Level::Error => println!("{loc}: {C_ERROR}error{C_RESET}: {str}"), + Level::Warn => println!("{loc}: {C_WARN}warn{C_RESET}: {str}"), + Level::Info => println!("{loc}: {C_INFO}info{C_RESET}: {str}"), + Level::Help => println!("{loc}: {C_HELP}help{C_RESET}: {str}"), + Level::Debug => println!("{loc}: {C_DEBUG}debug{C_RESET}: {str}"), + } +} + +#[macro_use] +pub mod log { + #[macro_export] + macro_rules! error { + ($($arg:tt)*) => { + crate::logger::_log(crate::logger::Level::Error, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! warn { + ($($arg:tt)*) => { + crate::logger::_log(crate::logger::Level::Warn, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! info { + ($($arg:tt)*) => { + crate::logger::_log(crate::logger::Level::Info, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! help { + ($($arg:tt)*) => { + crate::logger::_log(crate::logger::Level::Help, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! debug { + ($($arg:tt)*) => { + crate::logger::_log(crate::logger::Level::Debug, &format!($($arg)*)) + }; + } + + + #[macro_export] + macro_rules! lerror { + ($loc:expr, $($arg:tt)*) => { + crate::logger::_log_with_loc($loc, crate::logger::Level::Error, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! lwarn { + ($loc:expr, $($arg:tt)*) => { + crate::logger::_log_with_loc($loc, crate::logger::Level::Warn, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! linfo { + ($loc:expr, $($arg:tt)*) => { + crate::logger::_log_with_loc($loc, crate::logger::Level::Info, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! lhelp { + ($loc:expr, $($arg:tt)*) => { + crate::logger::_log_with_loc($loc, crate::logger::Level::Help, &format!($($arg)*)) + }; + } + #[macro_export] + macro_rules! ldebug { + ($loc:expr, $($arg:tt)*) => { + crate::logger::_log_with_loc($loc, crate::logger::Level::Debug, &format!($($arg)*)) + }; + } +} + diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..79ff7cd --- /dev/null +++ b/src/main.rs @@ -0,0 +1,11 @@ + + + +fn main() -> anyhow::Result<()> { + let data = std::fs::read_to_string("test.mcl").unwrap(); + + let tokens = mclangc::tokeniser::tokenise(&data)?; + let prog = parser::parse_program(tokens)?; + validator::validate_code(&prog); + Ok(()) +} diff --git a/src/parser/ast/expr.rs b/src/parser/ast/expr.rs new file mode 100644 index 0000000..7ce498f --- /dev/null +++ b/src/parser/ast/expr.rs @@ -0,0 +1,103 @@ +use std::collections::HashMap; + +use crate::tokeniser::tokentype::*; + +use super::{typ::Type, Ast}; + +#[derive(Debug, Clone)] +pub enum Expr { + // Comment(Comment), + Group(Box), + UnOp { + typ: Punctuation, + right: Box, + }, + BinOp { + typ: Punctuation, + left: Box, + right: Box, + }, + Literal(super::literal::Literal), + ArrayIndex { + name: Box, + index: Box, + }, + Path(Path), + Call { + path: Box, + params: CallParams, // Expr ~ (, Expr)* + }, + //MethodCall { + // var_name: Box, + // method_name: Ident, + // params: CallParams, + //}, + + /// the left side only exists on the /.|->/ chain + FieldAccess { + left: Box>, + right: Box, + }, + PtrFieldAccess { + left: Box>, + right: Box, + }, + ForLoop { + init: Box, + test: Box, + on_loop: Box, + body: Block, + }, + WhileLoop { + test: Box, + body: Block, + }, + InfLoop { + body: Block, + }, + If(IfExpr), + Struct { + path: Path, + fields: HashMap, + }, + Return(Box>), + Break, + Continue, + Cast { + left: Box, + right: Box + }, +} + +impl Expr { + pub fn unwrap_path(&self) -> Path { + let Expr::Path(p) = self else {panic!("Unwrapping")}; + p.clone() + } +} + + + +#[derive(Debug, Clone)] +pub struct CallParams(pub Vec); + +#[derive(Debug, Clone)] +pub struct Block(pub Vec); + +#[derive(Debug, Clone)] +pub struct Path(pub Vec); + + +#[derive(Debug, Clone)] +pub struct IfExpr { + pub test: Box, + pub body: Block, + pub else_if: Option +} + +#[derive(Debug, Clone)] +pub enum IfBranchExpr { + ElseIf(Box), + Else(Block) +} + diff --git a/src/parser/ast/literal.rs b/src/parser/ast/literal.rs new file mode 100644 index 0000000..bfc8dff --- /dev/null +++ b/src/parser/ast/literal.rs @@ -0,0 +1,22 @@ +use std::collections::HashMap; + +use crate::tokeniser::tokentype::*; + +use super::{expr::Expr, typ::Type, Ast}; + +#[derive(Debug, Clone)] +pub enum Literal { + Number(Number), + Ident(Ident), + String(TString), + Char(Char), + Array(Vec), + ArrayRepeat { + typ: Box, + count: Box, + }, + Struct { + name: Ident, + fields: HashMap + }, +} diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs new file mode 100644 index 0000000..312cd4d --- /dev/null +++ b/src/parser/ast/mod.rs @@ -0,0 +1,28 @@ +use std::collections::HashMap; + +use typ::Type; + +pub use crate::tokeniser::tokentype::*; + +pub mod expr; +pub mod literal; +pub mod statement; +pub mod typ; + +#[derive(Debug, Clone)] +pub struct Program { + pub ast: expr::Block, + pub structs: HashMap>, + pub enums: HashMap, + pub types: HashMap, + pub functions: HashMap, Type)>, + pub member_functions: HashMap, Type)>>, +} + +#[derive(Debug, Clone)] +pub enum Ast { + Expr(expr::Expr), + Statement(statement::Statement), +} + + diff --git a/src/parser/ast/statement.rs b/src/parser/ast/statement.rs new file mode 100644 index 0000000..04c584e --- /dev/null +++ b/src/parser/ast/statement.rs @@ -0,0 +1,44 @@ +use std::collections::HashMap; + +use super::{expr::{Block, Expr}, typ::Type, Ident, TString}; + + +#[derive(Debug, Clone)] +pub enum Statement { + Fn { + struct_name: Option, + name: Ident, + params: Vec<(Ident, Type)>, + ret_type: Option, + qual_const: bool, + qual_extern: Option, // abi + body: Option, // If None then its a type declaration + }, + TypeAlias { + name: Ident, + typ: Type, + }, + Struct { + name: Ident, + fields: Vec<(Ident, Type)>, + }, + Enum { + name: Ident, + fields: Vec, + }, + ConstVar { + name: Ident, + typ: Type, + val: Expr + }, + StaticVar { + name: Ident, + typ: Type, + val: Expr, + }, + Let { + name: Ident, + typ: Option, + val: Option, + }, +} diff --git a/src/parser/ast/typ.rs b/src/parser/ast/typ.rs new file mode 100644 index 0000000..a78206b --- /dev/null +++ b/src/parser/ast/typ.rs @@ -0,0 +1,17 @@ +use super::{expr::Expr, Ident, Number}; + +#[derive(Debug, Clone)] +pub enum Type { + Ref { + inner: Box, + mutable: bool, + }, + Array { + inner: Box, + }, + ArrayRepeat { + inner: Box, + count: Expr, + }, + Owned(Ident), +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..226054f --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,454 @@ +use std::collections::HashMap; + +use anyhow::{bail, Result}; + +use crate::{debug, lerror, parser::{typ::parse_type, Punctuation}, tokeniser::Token}; + +use super::{ast::{expr::{Block, CallParams, Expr, IfBranchExpr, IfExpr, Path}, literal::Literal, TokenType}, parse_item, utils, Delimiter, Keyword}; + +const BINOP_LIST: &[TokenType] = &[ + TokenType::Punct(Punctuation::Plus), + TokenType::Punct(Punctuation::Minus), + TokenType::Punct(Punctuation::Div), + TokenType::Punct(Punctuation::Star), + TokenType::Punct(Punctuation::Mod), + TokenType::Punct(Punctuation::Shl), + TokenType::Punct(Punctuation::Shr), + TokenType::Punct(Punctuation::AndAnd), + TokenType::Punct(Punctuation::OrOr), + TokenType::Punct(Punctuation::Ampersand), + TokenType::Punct(Punctuation::Or), + TokenType::Punct(Punctuation::Xor), + TokenType::Punct(Punctuation::AddEq), + TokenType::Punct(Punctuation::SubEq), + TokenType::Punct(Punctuation::DivEq), + TokenType::Punct(Punctuation::MulEq), + TokenType::Punct(Punctuation::ModEq), + TokenType::Punct(Punctuation::ShlEq), + TokenType::Punct(Punctuation::ShrEq), + TokenType::Punct(Punctuation::AndEq), + TokenType::Punct(Punctuation::OrEq), + TokenType::Punct(Punctuation::XorEq), + TokenType::Punct(Punctuation::Eq), + TokenType::Punct(Punctuation::EqEq), + TokenType::Punct(Punctuation::Lt), + TokenType::Punct(Punctuation::Gt), + TokenType::Punct(Punctuation::Le), + TokenType::Punct(Punctuation::Ge), +]; + +pub fn parse_expr(tokens: &mut Vec, precedence: usize, consume_semi: bool) -> Result> { + let res = if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::ParenL)) { + Some(parse_group(tokens)?) + } else + if let Some(_) = utils::check(tokens, TokenType::ident("")) { + let p = parse_path(tokens)?; + if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::CurlyL)) { + Some(parse_struct_literal(tokens, p.unwrap_path())?) + } else { + Some(p) + } + } else + if let Some(_) = utils::check_from_many(tokens, &[ + TokenType::Punct(Punctuation::Not), + TokenType::Punct(Punctuation::Plus), + TokenType::Punct(Punctuation::Minus), + TokenType::Punct(Punctuation::Ampersand), + TokenType::Punct(Punctuation::Star), + ]) { + Some(parse_unop(tokens)?) + } else + if let Some(_) = utils::check_from_many(tokens, &[ + TokenType::string("", false), + TokenType::number(0, 0, false), + TokenType::char('\0'), + TokenType::Delim(Delimiter::SquareL), + ]) { + Some(parse_literal(tokens)?) + } else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::While)) { + return Ok(Some(parse_while_loop(tokens)?)); + } else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::For)) { + return Ok(Some(parse_for_loop(tokens)?)); + } else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Loop)) { + return Ok(Some(parse_inf_loop(tokens)?)); + } else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Return)) { + return Ok(Some(parse_return(tokens)?)); + } else if let Some(_) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Break)) { + return Ok(Some(Expr::Break)); + } else if let Some(_) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Continue)) { + return Ok(Some(Expr::Continue)); + } else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::If)) { + return Ok(Some(Expr::If(parse_if(tokens)?))); + } else { + None + }; + + + if let Some(res) = res { + // check for binop + let res = match res { + _ if utils::check(tokens, TokenType::Punct(Punctuation::Fieldaccess)).is_some() => { + parse_field_access(tokens, res)? + } + _ if utils::check(tokens, TokenType::Punct(Punctuation::Arrow)).is_some() => { + parse_ptr_field_access(tokens, res)? + } + _ if utils::check(tokens, TokenType::Delim(Delimiter::ParenL)).is_some() => { + parse_fn_call(tokens, res)? + } + _ if utils::check(tokens, TokenType::Keyword(Keyword::As)).is_some() => { + parse_cast(tokens, res)? + } + _ if utils::check(tokens, TokenType::Delim(Delimiter::SquareL)).is_some() => { + parse_array_index(tokens, res)? + } + _ => res + }; + + if let Some(_) = utils::check_from_many(tokens, BINOP_LIST) { + return Ok(Some(parse_binop(tokens, res, precedence)?)); + } else { + return Ok(Some(res)); + } + + } + if consume_semi { + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "Expected ; at the end of the expression")?; + } + Ok(res) +} + +fn parse_return(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Return)); + let item = parse_expr(tokens, 0, true)?; + Ok(Expr::Return(Box::new(item))) +} + +fn parse_cast(tokens: &mut Vec, left: Expr) -> Result { + _ = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::As), "")?; + let typ = parse_type(tokens)?; + Ok(Expr::Cast { + left: Box::new(left), + right: Box::new(typ) + }) +} +fn parse_if(tokens: &mut Vec) -> Result { + let loc = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::If), "")?; + let Some(test) = parse_expr(tokens, 0, false)? else { + lerror!(loc.loc(), "Expected test for if statement, got nothing"); + bail!("") + }; + let block = parse_block(tokens)?; + if let Some(_) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Else)) { + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::If)) { + let branch = IfBranchExpr::ElseIf(Box::new(parse_if(tokens)?)); + Ok(IfExpr { + test: Box::new(test), + body: block, + else_if: Some(branch) + }) + } else { + let branch = IfBranchExpr::Else(parse_block(tokens)?); + Ok(IfExpr { + test: Box::new(test), + body: block, + else_if: Some(branch) + }) + } + } else { + Ok(IfExpr { + test: Box::new(test), + body: block, + else_if: None + }) + } +} +fn parse_while_loop(tokens: &mut Vec) -> Result { + let loc = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::While), "")?; + let Some(test) = parse_expr(tokens, 0, false)? else { + lerror!(loc.loc(), "Expected test comparrison for while loop, got nothing"); + bail!("") + }; + let block = parse_block(tokens)?; + Ok(Expr::WhileLoop { + test: Box::new(test), + body: block + }) +} +fn parse_for_loop(tokens: &mut Vec) -> Result { + let loc = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::For), "")?; + let Some(pre) = parse_item(tokens)? else { + lerror!(loc.loc(), "Expected init stat for a for loop, got nothing"); + bail!("") + }; + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), ""); + let Some(test) = parse_expr(tokens, 0, false)? else { + lerror!(loc.loc(), "Expected test comparrison for a for loop, got nothing"); + bail!("") + }; + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), ""); + let Some(post) = parse_expr(tokens, 0, false)? else { + lerror!(loc.loc(), "Expected post expression (usually an index increment) for a for loop, got nothing"); + bail!("") + }; + let block = parse_block(tokens)?; + + Ok(Expr::ForLoop { + init: Box::new(pre), + test: Box::new(test), + on_loop: Box::new(post), + body: block + }) +} +fn parse_inf_loop(tokens: &mut Vec) -> Result { + _ = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::Loop), ""); + let block = parse_block(tokens)?; + Ok(Expr::InfLoop { body: block }) +} +fn parse_fn_call(tokens: &mut Vec, left: Expr) -> Result { + _ = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenL), ""); + let mut params = Vec::new(); + + while !tokens.is_empty() { + if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::ParenR)) { + break; + } + let Some(param) = parse_expr(tokens, 0, false)? else {break}; + params.push(param); + if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) { + if let None = utils::check(tokens, TokenType::Delim(Delimiter::ParenR)) { + lerror!(&utils::get_last_loc(), "Expected ',' or ')' but didnt find either"); + bail!("") + } + } + } + _ = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenR), ""); + Ok(Expr::Call { path: Box::new(left), params: CallParams(params) }) +} +fn parse_array_index(tokens: &mut Vec, left: Expr) -> Result { + let loc = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareL), "")?; + let Some(idx) = parse_expr(tokens, 0, false)? else { + lerror!(loc.loc(), "Expected index for in array index but found nothing."); + bail!("") + }; + _ = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareR), ""); + Ok(Expr::ArrayIndex { + name: Box::new(left), + index: Box::new(idx) + }) +} + +fn parse_field_access(tokens: &mut Vec, left: Expr) -> Result { + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Fieldaccess), "unreachable")?; + + let right = if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Arrow)) { + let right = parse_path(tokens)?; + parse_ptr_field_access(tokens, right)? + } else if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Fieldaccess)) { + let right = parse_path(tokens)?; + parse_field_access(tokens, right)? + } else { + parse_path(tokens)? + }; + Ok(Expr::FieldAccess { + left: Box::new(Some(left)), + right: Box::new(right) + }) +} + +fn parse_ptr_field_access(tokens: &mut Vec, left: Expr) -> Result { + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Arrow), "unreachable")?; + let right = if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Arrow)) { + let right = parse_path(tokens)?; + parse_ptr_field_access(tokens, right)? + } else if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Fieldaccess)) { + let right = parse_path(tokens)?; + parse_field_access(tokens, right)? + } else { + parse_path(tokens)? + }; + Ok(Expr::PtrFieldAccess { + left: Box::new(Some(left)), + right: Box::new(right) + }) +} + +fn parse_literal(tokens: &mut Vec) -> Result { + if let Some(tkn) = utils::check_consume(tokens, TokenType::string("", false)) { + let TokenType::String(str) = tkn.tt() else {unreachable!()}; + return Ok(Expr::Literal(Literal::String(str.clone()))); + } else + if let Some(tkn) = utils::check_consume(tokens, TokenType::number(0, 0, false)) { + let TokenType::Number(val) = tkn.tt() else {unreachable!()}; + return Ok(Expr::Literal(Literal::Number(val.clone()))); + } else + if let Some(tkn) = utils::check_consume(tokens, TokenType::char('\0')) { + let TokenType::Char(val) = tkn.tt() else {unreachable!()}; + return Ok(Expr::Literal(Literal::Char(val.clone()))); + } else + if let Some(start) = utils::check_consume(tokens, TokenType::Delim(Delimiter::SquareL)) { + if let Some(_) = utils::check_consume(tokens, TokenType::Delim(Delimiter::SquareR)) { + return Ok(Expr::Literal(Literal::Array(Vec::new()))); + } + if *tokens[tokens.len()-2].tt() == TokenType::Punct(Punctuation::Comma) { + let first = parse_expr(tokens, 0, false)?; + let Some(first) = first else { unreachable!() }; + + let mut values = Vec::new(); + values.push(first); + while !tokens.is_empty() { + let Some(val) = parse_expr(tokens, 0, false)? else{break}; + + values.push(val); + if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) { + break; + } + } + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareR), "")?; + return Ok(Expr::Literal(Literal::Array(values))); + } else if *tokens[tokens.len()-2].tt() == TokenType::Punct(Punctuation::Semi) { + let typ = parse_type(tokens)?; + let count = parse_expr(tokens, 0, false)?.unwrap(); + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareR), "")?; + return Ok(Expr::Literal(Literal::ArrayRepeat { + typ: Box::new(typ), + count: Box::new(count) + })); + } else { + if let Some(curr) = tokens.last() { + lerror!(start.loc(), "Expected a , or ; as a separator in a literal array (normal, or repeating, respectively), but found {}", curr.tt()); + } else { + lerror!(start.loc(), "Expected a , or ; as a separator in a literal array (normal, or repeating, respectively), but found nothing"); + } + bail!("") + } + } + unreachable!() +} + +fn parse_struct_literal(tokens: &mut Vec, name: Path) -> Result { + _ = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyL), "")?; + let mut fields = HashMap::new(); + while !tokens.is_empty() { + if let Some(_) = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyR)) { + break; + } + + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?; + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Colon), "")?; + let typ = parse_expr(tokens, 0, false)?.unwrap(); + fields.insert(name.tt().unwrap_ident(), typ); + if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) { + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyR), "")?; + break; + } + } + Ok(Expr::Struct { path: name, fields }) +} + +fn parse_group(tokens: &mut Vec) -> Result { + let loc = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenL), "")?; + let Some(expr) = parse_expr(tokens, 0, false)? else { + lerror!(loc.loc(), "Expected expr found nothing"); + bail!("") + }; + + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenR), "")?; + Ok(Expr::Group(Box::new(expr))) +} + +fn parse_path(tokens: &mut Vec) -> Result { + let mut buf = Vec::new(); + let part = utils::check_consume(tokens, TokenType::ident("")).unwrap(); + + + buf.push(part.tt().unwrap_ident()); + while let Some(_) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Pathaccess)) { + let Some(part) = utils::check_consume(tokens, TokenType::ident("")) else { + break; + }; + buf.push(part.tt().unwrap_ident()); + } + + Ok(Expr::Path(Path(buf))) +} + +fn parse_unop(tokens: &mut Vec) -> Result { + let typ = utils::check_consume_or_err_from_many(tokens, &[ + TokenType::Punct(Punctuation::Not), + TokenType::Punct(Punctuation::Plus), + TokenType::Punct(Punctuation::Minus), + TokenType::Punct(Punctuation::Ampersand), + TokenType::Punct(Punctuation::Star), + ], "")?; + let loc = typ.loc().clone(); + let TokenType::Punct(typ) = typ.tt().clone() else {unreachable!()}; + + let Some(right) = parse_expr(tokens, 5, false)? else { + lerror!(&loc, "Expected expression after unary token, found nothing"); + bail!("") + }; + Ok(Expr::UnOp { + typ, + right: Box::new(right) + }) +} + +fn parse_binop(tokens: &mut Vec, mut lhs: Expr, precedence: usize) -> Result { + // TODO: https://en.wikipedia.org/wiki/Operator-precedence_parser#Pseudocode + + loop { + let op = match tokens.last() { + Some(op) if BINOP_LIST.contains(&op.tt()) => { + let TokenType::Punct(op) = op.tt() else {unreachable!()}; + op.clone() + } + Some(op) if [ + TokenType::Delim(Delimiter::ParenR), + TokenType::Punct(Punctuation::Semi) + ].contains(&op.tt()) => { + break + } + Some(op) if matches!(&op.tt(), TokenType::Ident(_)) => { + lerror!(op.loc(), "Unexpected identifier, did you forget a semicolon? ';'"); + bail!(""); + } + Some(_) | + None => break, + }; + debug!("OP: {op:?}"); + let (lp, rp) = op.precedence().unwrap(); + if lp < precedence { + break + } + + _ = tokens.pop(); + let Some(rhs) = parse_expr(tokens, rp, false)? else {break;}; + lhs = Expr::BinOp { + typ: op, + left: Box::new(lhs), + right: Box::new(rhs) + }; + } + + + Ok(lhs) + +} + +pub fn parse_block(tokens: &mut Vec) -> Result { + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyL), "")?; + let mut items = Vec::new(); + while !tokens.is_empty() { + if let Some(item) = parse_item(tokens)? { + items.push(item); + } else { + break; + } + if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::CurlyR)) { + break; + } + } + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyR), "")?; + Ok(Block(items)) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..8fe9755 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,50 @@ +use std::collections::HashMap; + +use ast::{expr::Block, Ast, Program}; + +use crate::tokeniser::{Token, tokentype::*}; + +pub mod ast; +mod expr; +mod stat; +mod utils; +mod typ; + +type Result = anyhow::Result; + +pub fn parse_program(mut tokens: Vec) -> Result { + let mut prog_body = Vec::new(); + + while !tokens.is_empty() { + if let Some(item) = parse_item(&mut tokens)? { + prog_body.push(item); + } else { + break + } + } + + + Ok(Program { + ast: Block(prog_body), + enums: HashMap::new(), + functions: HashMap::new(), + member_functions: HashMap::new(), + types: HashMap::new(), + structs: HashMap::new() + }) +} + +fn parse_item(tokens: &mut Vec) -> Result> { + if let Some(stat) = stat::parse_statement(tokens)? { + return Ok(Some(Ast::Statement(stat))); + } + if let Some(expr) = expr::parse_expr(tokens, 0, true)? { + return Ok(Some(Ast::Expr(expr))); + } + Ok(None) +} + + + + + diff --git a/src/parser/stat.rs b/src/parser/stat.rs new file mode 100644 index 0000000..7cde2b4 --- /dev/null +++ b/src/parser/stat.rs @@ -0,0 +1,211 @@ +use anyhow::bail; + +use crate::lerror; +use crate::parser::ast::TokenType; +use crate::parser::expr::parse_expr; +use crate::parser::{Delimiter, Ident, Keyword, Punctuation}; +use crate::tokeniser::Token; +use super::ast::typ::Type; +use super::expr::parse_block; +use super::typ::parse_type; +use super::utils; +use super::ast::statement::Statement; + +type Result = anyhow::Result; + +pub fn parse_statement(tokens: &mut Vec) -> Result> { + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Fn)) { + Ok(Some(parse_fn(tokens)?)) + } else + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Type)) { + Ok(Some(parse_type_alias(tokens)?)) + } else + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Const)) { + Ok(Some(parse_constant(tokens)?)) + } else + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Static)) { + Ok(Some(parse_static(tokens)?)) + } else + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Struct)) { + Ok(Some(parse_struct(tokens)?)) + } else + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Enum)) { + Ok(Some(parse_enum(tokens)?)) + } else + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Let)) { + Ok(Some(parse_let(tokens)?)) + } else { + Ok(None) + } +} + +fn parse_enum(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Enum)); + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + _ = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyL)); + let mut fields = Vec::new(); + while !tokens.is_empty() { + if let Some(_) = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyR)) { + break; + } + let field_name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?; + let loc = field_name.loc().clone(); + let field_name = field_name.tt().unwrap_ident(); + if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) { + if let None = utils::check(tokens, TokenType::Delim(Delimiter::CurlyR)) { + lerror!(&loc, "Expected comma after struct field"); + bail!("") + } + } + fields.push(field_name); + } + + Ok(Statement::Enum { name, fields }) +} + +fn parse_struct(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Struct)); + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + _ = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyL)); + let mut fields = Vec::new(); + while !tokens.is_empty() { + if let Some(_) = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyR)) { + break; + } + let field_name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?; + let loc = field_name.loc().clone(); + let field_name = field_name.tt().unwrap_ident(); + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Colon), "")?; + let typ = parse_type(tokens)?; + if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) { + if let None = utils::check(tokens, TokenType::Delim(Delimiter::CurlyR)) { + lerror!(&loc, "Expected comma after struct field"); + bail!("") + } + } + fields.push((field_name, typ)); + } + + Ok(Statement::Struct { name, fields }) +} + +fn parse_static(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Static)); + + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Colon), "")?; + let typ = parse_type(tokens)?; + let eq = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Eq), "")?; + let Some(val) = parse_expr(tokens, 0, false)? else { + lerror!(eq.loc(), "Expected expression found nothing"); + bail!("") + }; + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "")?; + Ok(Statement::StaticVar { name, typ, val }) +} + +fn parse_let(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Let)); + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + let mut typ = None; + let mut val = None; + if let Some(_) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Colon)) { + typ = Some(parse_type(tokens)?); + } + if let Some(eq) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Eq)) { + let Some(_val) = parse_expr(tokens, 0, false)? else { + lerror!(eq.loc(), "Expected expression found nothing"); + bail!("") + }; + val = Some(_val); + } + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "")?; + Ok(Statement::Let { name, typ, val }) +} +fn parse_constant(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Const)); + + if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Fn)) { + unimplemented!() + } + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Colon), "")?; + let typ = parse_type(tokens)?; + let eq = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Eq), "")?; + let Some(val) = parse_expr(tokens, 0, false)? else { + lerror!(eq.loc(), "Expected expression found nothing"); + bail!("") + }; + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "")?; + Ok(Statement::ConstVar { name, typ, val }) +} + +fn parse_type_alias(tokens: &mut Vec) -> Result { + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Type)); + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Eq), "")?; + let typ = parse_type(tokens)?; + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "")?; + + Ok(Statement::TypeAlias { name, typ }) +} + +fn parse_fn(tokens: &mut Vec) -> Result { + // Just remove the kw since we checked it before + _ = utils::check_consume(tokens, TokenType::Keyword(Keyword::Fn)); + + let mut struct_name = None; + let mut name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + // Check if this is a struct method + if let Some(_) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Fieldaccess)) { + struct_name = Some(name); + name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?.tt().unwrap_ident(); + } + let params = parse_fn_params(tokens)?; + + // Check for return type cause it optional + let mut ret_type = None; + if let Some(_) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Arrow)) { + ret_type = Some(parse_type(tokens)?); + } + let body; + if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::CurlyL)) { + body = Some(parse_block(tokens)?); + } else { + // Check if its just a declaration + _ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "")?; + body = None; + } + Ok(Statement::Fn { + struct_name, + name, + params, + ret_type, + qual_const: false, + qual_extern: None, + body, + }) +} + + + +fn parse_fn_params(tokens: &mut Vec) -> Result> { + let mut args = Vec::new(); + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenL), "")?; + while !tokens.is_empty() { + let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?; + utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Colon), "")?; + //dbg!(&name); + let typ = parse_type(tokens)?; + args.push((name.tt().unwrap_ident(), typ)); + + if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) { + break; + } + } + + utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenR), "")?; + Ok(args) +} + + diff --git a/src/parser/typ.rs b/src/parser/typ.rs new file mode 100644 index 0000000..0ea0908 --- /dev/null +++ b/src/parser/typ.rs @@ -0,0 +1,53 @@ +use anyhow::Result; + +use crate::tokeniser::Token; + +use super::{ast::{typ::Type, TokenType}, expr::parse_expr, utils, Keyword, Punctuation}; + +pub fn parse_type(tokens: &mut Vec) -> Result { + let mut ref_cnt = Vec::new(); + while let Some(tok) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Ampersand)) { + if let Some(tok) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Mut)) { + ref_cnt.push(tok.clone()); + } else { + ref_cnt.push(tok.clone()); + } + } + + let mut typ; + if let Some(_) = utils::check(tokens, TokenType::Delim(super::Delimiter::SquareL)) { + let itm_typ = parse_type(tokens)?; + if let Some(_) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Semi)) { + let count = parse_expr(tokens, 0, false)?.unwrap(); + typ = Type::ArrayRepeat { + inner: Box::new(itm_typ), + count + } + } else { + typ = Type::Array { + inner: Box::new(itm_typ), + } + } + } else { + let ident = utils::check_consume_or_err(tokens, TokenType::ident(""), "a")?; + typ = Type::Owned(ident.tt().unwrap_ident()); + } + while let Some(reft) = ref_cnt.pop() { + match reft.tt() { + TokenType::Keyword(Keyword::Mut) => { + typ = Type::Ref { + inner: Box::new(typ), + mutable: true + } + }, + TokenType::Punct(Punctuation::Ampersand) => { + typ = Type::Ref { + inner: Box::new(typ), + mutable: false + } + } + _ => unreachable!() + } + } + Ok(typ) +} diff --git a/src/parser/utils.rs b/src/parser/utils.rs new file mode 100644 index 0000000..1f3e58f --- /dev/null +++ b/src/parser/utils.rs @@ -0,0 +1,129 @@ +use std::sync::{Arc, Mutex}; + +use lazy_static::lazy_static; + +use crate::{common::Loc, debug, lerror, tokeniser::Token}; + +use super::ast::TokenType; + + +lazy_static!( + static ref LAST_LOC: Arc> = Arc::new(Mutex::new(Loc::default())); +); + +pub fn check(tokens: &Vec, tt: TokenType) -> Option<&Token> { + if let Some(tkn) = tokens.last() { + if tkn.tt() == &tt || + // ignore internal values if searching for these + matches!((tkn.tt(), &tt), (TokenType::Ident(_), TokenType::Ident(_))) || + matches!((tkn.tt(), &tt), (TokenType::String(_), TokenType::String(_))) || + matches!((tkn.tt(), &tt), (TokenType::Number(_), TokenType::Number(_))) || + matches!((tkn.tt(), &tt), (TokenType::Char(_), TokenType::Char(_))) + { + debug!("check: {}", tkn); + return Some(tkn); + } + } + None +} +pub fn check_2_last(tokens: &Vec, tt: TokenType) -> Option<&Token> { + if tokens.len() < 2 { + return None + } + if let Some(tkn) = tokens.get(tokens.len() - 2) { + if tkn.tt() == &tt || + // ignore internal values if searching for these + matches!((tkn.tt(), &tt), (TokenType::Ident(_), TokenType::Ident(_))) || + matches!((tkn.tt(), &tt), (TokenType::String(_), TokenType::String(_))) || + matches!((tkn.tt(), &tt), (TokenType::Number(_), TokenType::Number(_))) || + matches!((tkn.tt(), &tt), (TokenType::Char(_), TokenType::Char(_))) + { + return Some(tkn); + } + } + None +} + +pub fn check_consume(tokens: &mut Vec, tt: TokenType) -> Option { + if let Some(tkn) = tokens.last() { + if tkn.tt() == &tt || + // ignore internal values if searching for these + matches!((tkn.tt(), &tt), (TokenType::Ident(_), TokenType::Ident(_))) || + matches!((tkn.tt(), &tt), (TokenType::String(_), TokenType::String(_))) || + matches!((tkn.tt(), &tt), (TokenType::Number(_), TokenType::Number(_))) || + matches!((tkn.tt(), &tt), (TokenType::Char(_), TokenType::Char(_))) + { + *LAST_LOC.lock().expect("Could not lock LAST_LOC") = tkn.loc().clone(); + debug!("check_consume: {}", tokens.last()?); + return Some(tokens.pop()?); + } + } + None +} + +pub fn check_consume_or_err(tokens: &mut Vec, tt: TokenType, err_msg: &'static str) -> anyhow::Result { + if let Some(tkn) = tokens.last() { + if tkn.tt() == &tt || + // ignore internal values if searching for these + matches!((tkn.tt(), &tt), (TokenType::Ident(_), TokenType::Ident(_))) || + matches!((tkn.tt(), &tt), (TokenType::String(_), TokenType::String(_))) || + matches!((tkn.tt(), &tt), (TokenType::Number(_), TokenType::Number(_))) || + matches!((tkn.tt(), &tt), (TokenType::Char(_), TokenType::Char(_))) + { + *LAST_LOC.lock().expect("Could not lock LAST_LOC") = tkn.loc().clone(); + return Ok(tokens.pop().expect("Unreachable")); + } else { + lerror!(tkn.loc(), "Expected: '{tt}', got: '{}': {err_msg}", tkn.tt()); + //anyhow::bail!(format!("{}: ERROR: Expected: '{tt:?}', got: '{:?}': {err_msg}", tkn.loc(), tkn.tt())) + anyhow::bail!("") + } + } + let loc = LAST_LOC.lock().expect("Could not lock LAST_LOC"); + lerror!(&loc, "Expected: '{tt}', got: '(empty)': {err_msg}"); + // anyhow::bail!(format!("{loc}: ERROR: Expected '{tt:?}', got (empty): {err_msg}")) + anyhow::bail!("") +} + +pub fn check_consume_from_many(tokens: &mut Vec, tts: &[TokenType]) -> Option { + for tt in tts { + if let Some(tkn) = check_consume(tokens, tt.clone()) { + return Some(tkn); + } + } + None +} + +pub fn check_from_many<'a>(tokens: &'a mut Vec, tts: &[TokenType]) -> Option<&'a Token> { + for tt in tts { + if let Some(tkn) = check(tokens, tt.clone()) { + return Some(tkn); + } + } + None +} + +pub fn check_consume_or_err_from_many(tokens: &mut Vec, tts: &[TokenType], err_msg: &'static str) -> anyhow::Result { + if let Some(tkn) = tokens.last() { + for tt in tts { + if tkn.tt() == tt || + // ignore internal values if searching for these + matches!((tkn.tt(), &tt), (TokenType::Ident(_), TokenType::Ident(_))) || + matches!((tkn.tt(), &tt), (TokenType::String(_), TokenType::String(_))) || + matches!((tkn.tt(), &tt), (TokenType::Number(_), TokenType::Number(_))) || + matches!((tkn.tt(), &tt), (TokenType::Char(_), TokenType::Char(_))) + { + *LAST_LOC.lock().expect("Could not lock LAST_LOC") = tkn.loc().clone(); + return Ok(tokens.pop().expect("Unreachable")); + } + } + lerror!(tkn.loc(), "Expected: '{tts:?}', got: '{}': {err_msg}", tkn.tt()); + anyhow::bail!("") + } + let loc = LAST_LOC.lock().expect("Could not lock LAST_LOC"); + lerror!(&loc, "Expected: '{tts:?}', got: '(empty)': {err_msg}"); + anyhow::bail!("") +} + +pub fn get_last_loc() -> Loc { + LAST_LOC.lock().expect("Could not lock LAST_LOC").clone() +} diff --git a/src/tokeniser/mod.rs b/src/tokeniser/mod.rs new file mode 100644 index 0000000..45d0b0d --- /dev/null +++ b/src/tokeniser/mod.rs @@ -0,0 +1,292 @@ +use std::{collections::HashMap, fmt::Display}; +use anyhow::bail; +use parse_int::parse; +use crate::{common::{loc::LocIncr, Loc}, error, lerror}; + +pub mod tokentype; +use tokentype::*; + +#[derive(Debug, Clone)] +pub struct Token { + loc: Loc, + tt: TokenType, +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}: {:?}", self.loc(), self.tt()) + } +} + +impl Token { + fn new(tt: TokenType, loc: &Loc) -> Self { + Self { + tt, loc: loc.clone() + } + } + pub fn loc(&self) -> &Loc { + &self.loc + } + pub fn tt(&self) -> &TokenType { + &self.tt + } +} + + +pub fn tokenise(s: &str) -> anyhow::Result> { + let mut loc = Loc::default(); + let mut tokens = Vec::new(); + let chars: Vec<_> = s.chars().collect(); + let mut chars = chars.iter().peekable(); + while let Some(c) = chars.next() { + loc.inc_col(); + match c { + ' ' | '\t' => (), + '/' if chars.peek() == Some(&&'/') => { + let mut buf = String::new(); + chars.next(); + while let Some(c) = chars.next_if(|c| !matches!(c, '\n' | '\r')) { + loc.inc_col(); + buf.push(*c); + } + // tokens.push(Token::new(TokenType::Comment(Comment::Line(buf.clone())), &loc)); + }, + '/' if chars.peek() == Some(&&'*') => { + let mut buf = String::new(); + chars.next(); + while let Some(c) = chars.peek() { + if matches!(c, '\n' | '\r') { + loc.inc_line(); + } else { + loc.inc_col(); + } + let c = *chars.next().expect("Unreachable"); + if c == '*' && matches!(chars.peek(), Some(&&'/') | None) { + chars.next(); + break; + } + buf.push(c); + } + // tokens.push(Token::new(TokenType::Comment(Comment::Line(buf.clone())), &loc)); + } + '\n' => loc.inc_line(), + '"' | '\'' | + 'c' if *c != 'c' || chars.peek() == Some(&&'"') => { + let str_typ = *c; + let mut sc = *c; + if *c == 'c' { + sc = '"'; + chars.peek(); + } + let mut last = '\0'; + let mut buf = String::new(); + while let Some(c) = chars.next_if(|v| **v != '\n') { + loc.inc_col(); + if *c == sc && last != '\\' { + break; + } + buf.push(*c); + last = *c; + } + + match str_typ { + '"' => { + tokens.push(Token::new(TokenType::string(&buf, false), &loc)); + } + 'c' => { + tokens.push(Token::new(TokenType::string(&buf, true), &loc)); + } + '\'' => { + let buf = buf + .replace("\\n", "\n") + .replace("\\r", "\r"); + if buf.len() > 1 { + lerror!(&loc, "Chars can only have 1 byte"); + bail!("") + } + tokens.push(Token::new(TokenType::char(buf.chars().nth(0).unwrap()), &loc)); + } + _ => unreachable!() + } + } + 'a'..='z' | 'A'..='Z' | '_' => { + let mut buf = String::new(); + buf.push(*c); + while let Some(c) = chars.next_if(|v| matches!(**v, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9')) { + loc.inc_col(); + buf.push(*c); + } + if let Some(kw) = TokenType::from_str(&buf) { + tokens.push(Token::new(kw, &loc)); + continue; + } + tokens.push(Token::new(TokenType::ident(&buf), &loc)); + buf.clear(); + }, + + '+' | '-' | '0'..='9' + // Checks if its a number an not an operator in disguise + if matches!(c, '0'..='9') || matches!(chars.peek(), Some('0'..='9')) => { + let mut buf = String::new(); + buf.push(*c); + let signed = *c == '-'; + let mut radix = 10; + match chars.peek() { + Some(v) => { + match v { + 'x' => radix = 16, + 'b' => radix = 2, + 'o' => radix = 8, + _ => (), + } + }, + None => { + tokens.push(Token::new(TokenType::number(parse(&buf).unwrap(), radix, signed), &loc)); + } + } + while let Some(c) = chars.next_if(|v| matches!(**v, '0'..='9' | '.' | 'a'..='f' | 'A'..='F')) { + loc.inc_col(); + buf.push(*c); + } + match radix { + 2 => { + if buf.strip_prefix("0b").expect("Unreachable") + .chars().filter(|v| !matches!(v, '0' | '1')).collect::>().len() > 0 { + lerror!(&loc, "Invalid character in binary number"); + bail!("") + } + tokens.push(Token::new(TokenType::number(parse(&buf).unwrap(), radix, signed), &loc)); + } + 8 => { + if buf.strip_prefix("0o").expect("Unreachable") + .chars().filter(|v| !matches!(v, '0'..='7')).collect::>().len() > 0 { + lerror!(&loc, "Invalid character in octal number"); + bail!("") + } + tokens.push(Token::new(TokenType::number(parse(&buf).unwrap(), radix, false), &loc)); + } + 10 => { + if buf.chars().filter(|v| !matches!(v, '0'..='9' | '.')).collect::>().len() > 0 { + lerror!(&loc, "Invalid character in decimal number"); + bail!("") + } + if buf.contains(".") { + if buf.chars().filter(|v| *v == '.').collect::>().len() > 1 { + lerror!(&loc, "Floats cant have more than 1 dot"); + } + todo!() + } + tokens.push(Token::new(TokenType::number(parse(&buf).unwrap(), radix, signed), &loc)); + } + 16 => { + if buf.strip_prefix("0x").expect("Unreachable") + .chars().filter(|v| !matches!(v, '0'..='9' | 'a'..='f' | 'A'..='F')).collect::>().len() > 0 { + lerror!(&loc, "Invalid character in hex number"); + bail!("") + } + tokens.push(Token::new(TokenType::number(parse(&buf).unwrap(), radix, false), &loc)); + } + _ => unreachable!() + } + + buf.clear(); + }, + + _ => { + let mut buf = String::new(); + buf.push(*c); + while let Some(c) = chars.peek() { + if let None = TokenType::from_str(&format!("{buf}{c}")) { + break; + } + if let Some(c) = chars.next() { + buf.push(*c); + } + } + if let Some(tt) = TokenType::from_str(&buf) { + tokens.push(Token::new(tt, &loc)); + } else { + lerror!(&loc, "Unknown token: {buf}"); + } + } + } + } + tokens.reverse(); + Ok(tokens) +} + + + +// Lookup table for all tokens, fast for normal tokenisation, +// but slower for reveres lookup (for like error messages) +lazy_static::lazy_static!( + static ref TT: HashMap<&'static str, TokenType> = [ + ("fn", TokenType::Keyword(Keyword::Fn)), + ("if", TokenType::Keyword(Keyword::If)), + ("else", TokenType::Keyword(Keyword::Else)), + ("struct", TokenType::Keyword(Keyword::Struct)), + ("enum", TokenType::Keyword(Keyword::Enum)), + ("type", TokenType::Keyword(Keyword::Type)), + ("while", TokenType::Keyword(Keyword::While)), + ("for", TokenType::Keyword(Keyword::For)), + ("break", TokenType::Keyword(Keyword::Break)), + ("continue", TokenType::Keyword(Keyword::Continue)), + ("let", TokenType::Keyword(Keyword::Let)), + ("const", TokenType::Keyword(Keyword::Const)), + ("mut", TokenType::Keyword(Keyword::Mut)), + ("static", TokenType::Keyword(Keyword::Static)), + ("true", TokenType::Keyword(Keyword::True)), + ("false", TokenType::Keyword(Keyword::False)), + ("include", TokenType::Keyword(Keyword::Include)), + ("extern", TokenType::Keyword(Keyword::Extern)), + ("return", TokenType::Keyword(Keyword::Return)), + ("loop", TokenType::Keyword(Keyword::Loop)), + ("as", TokenType::Keyword(Keyword::As)), + ("{", TokenType::Delim(Delimiter::CurlyL)), + ("}", TokenType::Delim(Delimiter::CurlyR)), + ("[", TokenType::Delim(Delimiter::SquareL)), + ("]", TokenType::Delim(Delimiter::SquareR)), + ("(", TokenType::Delim(Delimiter::ParenL)), + (")", TokenType::Delim(Delimiter::ParenR)), + (";", TokenType::Punct(Punctuation::Semi)), + (":", TokenType::Punct(Punctuation::Colon)), + ("::", TokenType::Punct(Punctuation::Pathsep)), + ("->", TokenType::Punct(Punctuation::Arrow)), + ("=>", TokenType::Punct(Punctuation::FatArrow)), + ("+", TokenType::Punct(Punctuation::Plus)), + ("-", TokenType::Punct(Punctuation::Minus)), + (",", TokenType::Punct(Punctuation::Comma)), + ("&", TokenType::Punct(Punctuation::Ampersand)), + ("*", TokenType::Punct(Punctuation::Star)), + ("!", TokenType::Punct(Punctuation::Not)), + ("/", TokenType::Punct(Punctuation::Div)), + ("%", TokenType::Punct(Punctuation::Mod)), + ("<<", TokenType::Punct(Punctuation::Shl)), + (">>", TokenType::Punct(Punctuation::Shr)), + ("&&", TokenType::Punct(Punctuation::AndAnd)), + ("||", TokenType::Punct(Punctuation::OrOr)), + ("|", TokenType::Punct(Punctuation::Or)), + (">", TokenType::Punct(Punctuation::Gt)), + ("<", TokenType::Punct(Punctuation::Lt)), + (">=", TokenType::Punct(Punctuation::Ge)), + ("<=", TokenType::Punct(Punctuation::Le)), + ("^", TokenType::Punct(Punctuation::Xor)), + ("+=", TokenType::Punct(Punctuation::AddEq)), + ("-=", TokenType::Punct(Punctuation::SubEq)), + ("/=", TokenType::Punct(Punctuation::DivEq)), + ("*=", TokenType::Punct(Punctuation::MulEq)), + ("%=", TokenType::Punct(Punctuation::ModEq)), + ("<<=", TokenType::Punct(Punctuation::ShlEq)), + (">>=", TokenType::Punct(Punctuation::ShrEq)), + ("&=", TokenType::Punct(Punctuation::AndEq)), + ("|=", TokenType::Punct(Punctuation::OrEq)), + ("^=", TokenType::Punct(Punctuation::XorEq)), + ("=", TokenType::Punct(Punctuation::Eq)), + ("==", TokenType::Punct(Punctuation::EqEq)), + ("!=", TokenType::Punct(Punctuation::Neq)), + (".", TokenType::Punct(Punctuation::Fieldaccess)), + ("::", TokenType::Punct(Punctuation::Pathaccess)), + ].into(); +); + + diff --git a/src/tokeniser/tokentype.rs b/src/tokeniser/tokentype.rs new file mode 100644 index 0000000..fc8baca --- /dev/null +++ b/src/tokeniser/tokentype.rs @@ -0,0 +1,215 @@ +use core::panic; +use std::fmt::Display; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Ident(pub String); + +impl ToString for Ident { + fn to_string(&self) -> String { + self.0.clone() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Number { + pub val: usize, + pub base: u8, + pub signed: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct TString { + pub val: String, + pub cstr: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Char(char); + +impl Into for Char { + fn into(self) -> char { + self.0 + } +} + +impl From for Char { + fn from(value: char) -> Self { + Char(value) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Keyword { + Fn, If, Else, Struct, Enum, + Type, While, For, Break, Continue, + Let, Const, Mut, Static, + True, False, Include, Extern, Return, + As, Loop +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Delimiter { + CurlyL, CurlyR, + SquareL, SquareR, + ParenL, ParenR, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Punctuation { + Semi, Colon, Pathsep, Comma, + Arrow, FatArrow, Plus, Minus, + Ampersand, Star, Div, + Mod, Shl, Shr, AndAnd, + OrOr, Or, Xor, Not, + AddEq, SubEq, + DivEq, MulEq, + ModEq, ShlEq, + ShrEq, AndEq, + OrEq, XorEq, + Eq, EqEq, Fieldaccess, + Pathaccess, Lt, Gt, Le, Ge, Neq + +} + +impl Punctuation { + + // pls help + pub fn precedence(&self) -> Option<(usize, usize)> { + match self { + Punctuation::AddEq | + Punctuation::SubEq | + Punctuation::DivEq | + Punctuation::MulEq | + Punctuation::ModEq | + Punctuation::ShlEq | + Punctuation::ShrEq | + Punctuation::AndEq | + Punctuation::OrEq | + Punctuation::XorEq | + Punctuation::Eq => Some((1, 2)), + + Punctuation::EqEq | + Punctuation::Neq => Some((3, 4)), + + Punctuation::Div | + Punctuation::Star | + Punctuation::Mod => Some((5,6)), + + Punctuation::Plus | + Punctuation::Minus => Some((7,8)), + + Punctuation::Shl | + Punctuation::Shr => Some((9,10)), + + Punctuation::Lt | + Punctuation::Gt | + Punctuation::Le | + Punctuation::Ge => Some((11, 12)), + + Punctuation::Ampersand => Some((13, 14)), + Punctuation::Xor => Some((15, 16)), + Punctuation::Or => Some((17, 18)), + Punctuation::AndAnd => Some((19, 20)), + Punctuation::OrOr => Some((21, 22)), + + _ => None + } + + } +} + +#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Ord, Eq)] +pub enum TokenType { + Ident(Ident), + Number(Number), + String(TString), + Char(Char), + Keyword(Keyword), + Delim(Delimiter), + Punct(Punctuation), + Comment(Comment), +} + +#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Ord, Eq)] +pub enum Comment { + Line(String), + Block(String) +} + +impl TokenType { + pub fn unwrap_ident(&self) -> Ident { + match self { + Self::Ident(i) => i.clone(), + _ => panic!("Expected {}, got {self}", Self::ident("")) + } + } + pub fn ident(s: &str) -> Self { + Self::Ident(Ident(s.to_string())) + } + pub fn number(val: usize, base: u8, signed: bool) -> Self { + Self::Number(Number { val, base, signed }) + } + pub fn string(s: &str, cstr: bool) -> Self{ + Self::String(TString { val: s.to_string(), cstr }) + } + pub fn char(v: char) -> Self { + Self::Char(Char(v)) + } + pub fn from_str(s: &str) -> Option { + super::TT.get(s).cloned() + } + pub fn to_str(&self) -> String { + for (k, v) in super::TT.iter() { + if v == self { + return k.to_string(); + } + } + + match self { + TokenType::Ident(s) => { + return format!("Ident(\"{}\")", s.to_string()); + }, + TokenType::Number(num) => { + match num.base { + 2 => { + assert!(!num.signed, "base 2 (binary) numbers physically cannot be signed"); + format!("{:#b}", num.val) + } + 8 => { + assert!(!num.signed, "base 8 (octal) numbers physically cannot be signed"); + format!("{:#o}", num.val) + } + 10 => { + if num.signed { + format!("{}", num.val as isize) + } else { + format!("{}", num.val) + } + } + 16 => { + assert!(!num.signed, "base 16 (hex) numbers physically cannot be signed"); + format!("{:#x}", num.val) + } + _ => panic!("Invalid base for number, {}", num.base), + } + }, + TokenType::String(s) => { + if s.cstr { + format!("\"{}\\0\"", s.val) + } else { + format!("\"{}\"", s.val) + } + }, + TokenType::Char(c) => { + format!("'{}'", c.0) + } + _ => unreachable!("Unreachable, did you add a new token and forget to add reverse lookup?"), + } + } +} + +impl Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_str()) + } +} diff --git a/src/validator/mod.rs b/src/validator/mod.rs new file mode 100644 index 0000000..934612e --- /dev/null +++ b/src/validator/mod.rs @@ -0,0 +1,8 @@ +use crate::parser::ast::Program; + + + +pub fn validate_code(prog: &Program) -> anyhow::Result<()> { + + Ok(()) +} diff --git a/src/validator/predefined.rs b/src/validator/predefined.rs new file mode 100644 index 0000000..e69de29 diff --git a/test.mcl b/test.mcl new file mode 100644 index 0000000..54a4b8f --- /dev/null +++ b/test.mcl @@ -0,0 +1,86 @@ + +enum Wah { + A, + B, + C, + D +} + +struct Baz { + owo: i32, + uwu: usize +} + +/// Type definitions +// type Rah = &Baz; + +/// Different kinds of functions +// Normal function +// fn main(a: &Foo, b: Rah) -> Nya { + /// General expressions (math) + // Works + // let a = 1 * 3 == 4; + // let b = 3/4 == *a; + // let c = (a->b.c->d) / 2; + // let d = 2 / a->b.c->d; + // let e = a->b.c->d / 2; + // let f = a.b.c.d / 2; + // let g = a.b[a.c] * 5; + + // No worky + // nothing! yay! + + /// Struct literals + // let a = Baz { + // owo: a, + // uwu: b + c / d + // }; + + /// If statement + // if 1 > 3 { + // ";3" + // } else + // if *a == 3 { + // ":0" + // } else { + // ">:(" + // } + + + /// 3 kinds of loops all doing the same thing + /// While loops + // let iw = 0; + // while iw < 10 { + // println("Owo"); + // } + + /// For loops + // for let ifr = 0 ; ifr < 20 ; ifr += 1 { + // println("nya"); + // } + + /// Infinite loops + // let il = 0; + // loop { + // if il > 10 { + // break; + // } + // println("Rah"); + // } + + /// Function Calls + // println(":3"); +//} + +// Struct member function with inner data +//fn Baz.main(self: &mut Baz, a: &Foo, b: &mut Bar) -> &Nya; + +// Struct member function without any data a.k.a a static member func +//fn Baz.main(a: &Foo, b: &mut Bar) -> &Nya; + + + + + + + diff --git a/tests/parser/enums.mcl b/tests/parser/enums.mcl new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/expr.mcl b/tests/parser/expr.mcl new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/fn.mcl b/tests/parser/fn.mcl new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/if.mcl b/tests/parser/if.mcl new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/loops.mcl b/tests/parser/loops.mcl new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser/structs.mcl b/tests/parser/structs.mcl new file mode 100644 index 0000000..e69de29