From 9625256554f00008cd776abafc91a0294be739c6 Mon Sep 17 00:00:00 2001 From: MCorange Date: Sun, 23 Apr 2023 17:51:05 +0300 Subject: [PATCH] Added cstrings --- Cargo.lock | 2 +- Cargo.toml | 4 +++ README.md | 6 +++++ include/fs.mcl | 6 +++++ include/io.mcl | 19 +++++++++++--- src/compile/linux_x86_64.rs | 26 ++++++++++++-------- src/config.rs | 4 +-- src/constants.rs | 8 ++++-- src/interpret/linux_x86_64/mod.rs | 21 ++++++++++++++-- src/lexer.rs | 41 +++++++++++++++++++++++-------- src/main.rs | 2 +- src/parser.rs | 8 ++++-- src/preprocessor.rs | 36 ++++++++++++++++++--------- src/typechecker.rs | 5 +++- test.mcl | 15 ++++------- 15 files changed, 146 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54e6f9f..8623e04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,7 +210,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] -name = "mclang" +name = "mclangc" version = "0.1.0" dependencies = [ "clap", diff --git a/Cargo.toml b/Cargo.toml index 89dcf10..1dbe310 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,11 @@ [package] name = "mclangc" +description="The McLang Programming language compiler" version = "0.1.0" edition = "2021" +authors=[ + "MCorange (https://mcorangehq.xyz/)" +] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] diff --git a/README.md b/README.md index e1eb630..e861a0b 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,12 @@ This is the second revision of [MCLang](https://github.com/mc-lang/mclang) now w The docs are currently are just made in MarkDown. You can find the docs [here](/docs/index.md) +## Cheatsheet + +Usefull things that i search for a lot in the sourcecode so i added them here + +Syscall arg order: \[rax ,rdi ,rsi ,rdx ,r10 ,r8 ,r9\] + ## Credits [MCotange](https://github.com/MCorange99) - The one and only me, the creator and current maintainer or mclang rev1 and rev2 diff --git a/include/fs.mcl b/include/fs.mcl index 3268996..f84828e 100644 --- a/include/fs.mcl +++ b/include/fs.mcl @@ -16,3 +16,9 @@ const FS_O_PATH 2097152 end // open descriptor for obtaining permissions and sta const FS_O_SYNC 1052672 end // wait for IO to complete before returning const FS_O_TMPFILE 4259840 end // create an unnamed, unreachable (via any other open call) temporary file const FS_O_TRUNC 512 end // if file exists, ovewrite it (careful!) + + +fn fs_read_to_string with int ptr returns int ptr then + + +done \ No newline at end of file diff --git a/include/io.mcl b/include/io.mcl index 22f0b43..a35ca6f 100644 --- a/include/io.mcl +++ b/include/io.mcl @@ -5,7 +5,7 @@ // @arg buff_ptr: Ptr - pointer to the buffer to write // @arg fd: Int - file descriptor // @ret Int -inline fn write with int ptr int returns int then +inline fn fwrite with int ptr int returns int then SYS_write syscall3 done @@ -15,18 +15,29 @@ done // @arg buff_ptr: Ptr - pointer to the buffer to write // @arg fd: Int - file descriptor // @ret Int -inline fn read with int ptr int returns int then +inline fn fread with int ptr int returns int then SYS_read syscall3 done +// Write to a file descriptor using the SYS_write syscall +// args: [buff_ptr, flags, mode] +// @arg buff_ptr: Ptr - File to open +// @arg flags: Int - Flags +// @arg mode: Int - Mode +// @ret Int - Fd +inline fn fopen with int ptr int returns int then + SYS_open syscall3 +done + + // Print a string to STDOUT // args: [str_size, str_ptr] // @arg buff_size: Int - number of bytes to write // @arg buff_ptr: Ptr - pointer to the buffer to write // @ret NULL inline fn puts with int ptr returns void then - STDOUT write drop + STDOUT fwrite drop done // Print a string to STDERR @@ -35,7 +46,7 @@ done // @arg buff_ptr: Ptr - pointer to the buffer to write // @ret NULL inline fn eputs with int ptr returns void then - STDOUT write drop + STDOUT fwrite drop done // TODO: make putc and eputc after we make local mem diff --git a/src/compile/linux_x86_64.rs b/src/compile/linux_x86_64.rs index 572caf7..1c3b120 100644 --- a/src/compile/linux_x86_64.rs +++ b/src/compile/linux_x86_64.rs @@ -72,20 +72,18 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ writeln!(writer, " add rsp, 40")?; writeln!(writer, " ret")?; - if crate::config::ENABLE_EXPORTED_FUNCTIONS && !args.lib_mode { + if !crate::config::ENABLE_EXPORTED_FUNCTIONS && !args.lib_mode { writeln!(writer, "global _start")?; writeln!(writer, "_start:")?; writeln!(writer, " lea rbp, [rel ret_stack]")?; writeln!(writer, " call main")?; writeln!(writer, " jmp end")?; - } let mut ti = 0; while ti < tokens.len() { let token = &tokens[ti]; - // println!("{:?}", token); if debug { writeln!(writer, "addr_{ti}:")?; if token.typ == OpType::Instruction(InstructionType::PushInt) { @@ -116,8 +114,8 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ _ => () } } - } + match token.typ.clone() { // stack @@ -136,6 +134,13 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ strings.push(token.text.clone()); ti += 1; } + InstructionType::PushCStr => { + writeln!(writer, " push rax")?; + writeln!(writer, " mov rax, str_{}", strings.len())?; + writeln!(writer, " push rax")?; + strings.push(token.text.clone()); + ti += 1; + } InstructionType::Drop => { writeln!(writer, " pop rax")?; ti += 1; @@ -198,7 +203,7 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ InstructionType::Load32 => { writeln!(writer, " pop rax")?; writeln!(writer, " xor rbx, rbx")?; - writeln!(writer, " mov bl, dword [rax]")?; + writeln!(writer, " mov ebx, dword [rax]")?; writeln!(writer, " push rbx")?; ti += 1; } @@ -206,13 +211,13 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ InstructionType::Store32 => { writeln!(writer, " pop rbx")?; writeln!(writer, " pop rax")?; - writeln!(writer, " mov dword[rax], bl")?; + writeln!(writer, " mov dword[rax], ebx")?; ti += 1; } InstructionType::Load64 => { writeln!(writer, " pop rax")?; writeln!(writer, " xor rbx, rbx")?; - writeln!(writer, " mov bl, qword [rax]")?; + writeln!(writer, " mov rbx, qword [rax]")?; writeln!(writer, " push rbx")?; ti += 1; } @@ -220,7 +225,7 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ InstructionType::Store64 => { writeln!(writer, " pop rbx")?; writeln!(writer, " pop rax")?; - writeln!(writer, " mov qword [rax], bl")?; + writeln!(writer, " mov qword [rax], rbx")?; ti += 1; } @@ -421,6 +426,7 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ }, InstructionType::Return => { + // Experimental feature exported functions if crate::config::ENABLE_EXPORTED_FUNCTIONS && should_push_ret { writeln!(writer, " pop rdx")?; should_push_ret = false; @@ -479,7 +485,7 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ } KeywordType::End => { if ti + 1 != token.jmp { - // writeln!(writer, " jmp addr_{}", token.jmp)?; + writeln!(writer, " jmp addr_{}", token.jmp)?; } ti += 1; }, @@ -584,7 +590,7 @@ pub fn compile(tokens: &[Operator], args: &Args) -> Result{ } } writeln!(writer, "addr_{ti}:")?; - if crate::config::ENABLE_EXPORTED_FUNCTIONS && !args.lib_mode { + if !crate::config::ENABLE_EXPORTED_FUNCTIONS && !args.lib_mode { writeln!(writer, "end:")?; writeln!(writer, " mov rax, 60")?; writeln!(writer, " mov rdi, 0")?; diff --git a/src/config.rs b/src/config.rs index 214c4be..1334086 100644 --- a/src/config.rs +++ b/src/config.rs @@ -4,9 +4,9 @@ pub const DEV_MODE: bool = false; pub const DEFAULT_OUT_FILE: &str = "a.out"; -pub const DEFAULT_INCLUDES: [&str;1] = [ +pub const DEFAULT_INCLUDES: [&str;2] = [ "./include", - // "~/.mclang/include", + "~/.mclang/include", ]; diff --git a/src/constants.rs b/src/constants.rs index 9027aa6..21da70c 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -7,6 +7,7 @@ pub enum InstructionType { // stack PushInt, PushStr, + PushCStr, Drop, Print, Dup, @@ -131,7 +132,7 @@ impl Operator { // self.types = (args, rets); // (*self).clone() // } - + } impl OpType { @@ -139,9 +140,10 @@ impl OpType { match (*self).clone() { OpType::Instruction(instruction) => { match instruction { - + InstructionType::PushInt => "Number", InstructionType::PushStr => "String", + InstructionType::PushCStr => "CString", InstructionType::Print => "_dbg_print", InstructionType::Dup => "dup", InstructionType::Drop => "drop", @@ -235,6 +237,7 @@ pub enum TokenType { Word, Int, String, + CString, Char } @@ -254,6 +257,7 @@ impl TokenType { TokenType::Word => "Word", TokenType::Int => "Int", TokenType::String => "String", + TokenType::CString => "CString", TokenType::Char => "Char" }.to_string() } diff --git a/src/interpret/linux_x86_64/mod.rs b/src/interpret/linux_x86_64/mod.rs index 62c686e..99a636d 100644 --- a/src/interpret/linux_x86_64/mod.rs +++ b/src/interpret/linux_x86_64/mod.rs @@ -48,7 +48,7 @@ pub fn run(ops: &[crate::constants::Operator]) -> Result{ ip += 1; }, InstructionType::PushStr => { - if op.addr.is_none() { + if op.addr.is_none() { stack.push(op.text.len()); // string len stack.push(string_idx + crate::MEM_SZ); @@ -64,6 +64,23 @@ pub fn run(ops: &[crate::constants::Operator]) -> Result{ } + ip += 1; + }, + InstructionType::PushCStr => { + if op.addr.is_none() { + stack.push(string_idx + crate::MEM_SZ); + + for c in op.text.bytes() { + mem[crate::MEM_SZ + string_idx] = u64::from(c); + string_idx += 1; + } + } else { + if let Some(addr) = op.addr { + stack.push(addr); + } + } + + ip += 1; }, InstructionType::Drop => { @@ -113,7 +130,7 @@ pub fn run(ops: &[crate::constants::Operator]) -> Result{ InstructionType::Load32 | InstructionType::Load64 => { let a = stack_pop(&mut stack, &pos)?; - if a > crate::MEM_SZ { + if a > crate::MEM_SZ + crate::STRING_SZ { lerror!(&op.loc, "Invalid memory address {a}"); return Ok(1); } diff --git a/src/lexer.rs b/src/lexer.rs index 4943395..5864ec4 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -12,6 +12,9 @@ fn lex_word(s: String, tok_type: TokenType) -> (TokenType, String) { s if tok_type == TokenType::String => { (TokenType::String, s) } + s if tok_type == TokenType::CString => { + (TokenType::CString, s) + } s if tok_type == TokenType::Char => { (TokenType::Char, s) } @@ -72,17 +75,35 @@ fn lex_line(text: &str) -> Vec<(usize, String, TokenType)> { } else { - col_end = find_col(text, col, |x, _| x.is_whitespace()); - let t = &text[col..col_end]; - - if t == "//" { - return tokens; + if &text[col..=col] == "c" && text.len() - 1 + col > 0 && &text[col+1..=col+1] == "\"" { + col_end = find_col(text, col + 2, |x, x2| x == '"' && x2 != '\\'); + let t = &text[(col + 2)..col_end]; + let mut t = t.replace("\\n", "\n") + .replace("\\t", "\t") + .replace("\\r", "\r") + .replace("\\\'", "\'") + .replace("\\\"", "\"") + .replace("\\0", "\0"); + + if !t.is_empty() { + t.push('\0'); + tokens.push((col, t.to_string(), TokenType::CString)); + } + col = find_col(text, col_end + 1, |x, _| !x.is_whitespace()); + + } else { + col_end = find_col(text, col, |x, _| x.is_whitespace()); + let t = &text[col..col_end]; + + if t == "//" { + return tokens; + } + + if !t.is_empty() { + tokens.push((col, t.to_string(), TokenType::Word)); + } + col = find_col(text, col_end, |x, _| !x.is_whitespace()); } - - if !t.is_empty() { - tokens.push((col, t.to_string(), TokenType::Word)); - } - col = find_col(text, col_end, |x, _| !x.is_whitespace()); } } tokens diff --git a/src/main.rs b/src/main.rs index 9de7469..2dd5c07 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,7 +19,7 @@ use color_eyre::Result; use eyre::eyre; #[derive(Parser, Debug, Clone)] -#[command(author, version, about, long_about = None)] +#[command(author=env!("CARGO_PKG_AUTHORS"), version=env!("CARGO_PKG_VERSION"), about=env!("CARGO_PKG_DESCRIPTION"), long_about=env!("CARGO_PKG_DESCRIPTION"))] pub struct Args { /// Input source file #[arg(long, short)] diff --git a/src/parser.rs b/src/parser.rs index 61ffe43..971cacd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -70,7 +70,8 @@ pub fn cross_ref(mut program: Vec) -> Result> { } if !stack.is_empty() { // println!("{:?}", stack); - lerror!(&program[stack.pop().expect("Empy stack")].clone().loc,"Unclosed block, {:?}", program[stack.pop().expect("Empy stack")].clone()); + let i = stack.pop().expect("Empy stack"); + lerror!(&program[i].clone().loc,"Unclosed block, {:?}", program[i].clone()); return Err(eyre!("Unclosed block")); } @@ -120,7 +121,10 @@ impl<'a> Parser<'a> { }, TokenType::String => { tokens.push(Operator::new(OpType::Instruction(InstructionType::PushStr), token.typ, 0, token.text.clone(), token.file.clone(), token.line, token.col)); - } + }, + TokenType::CString => { + tokens.push(Operator::new(OpType::Instruction(InstructionType::PushCStr), token.typ, 0, token.text.clone(), token.file.clone(), token.line, token.col)); + }, TokenType::Char => { let c = token.text.clone(); if c.len() != 1 { diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 360e47b..f4e8993 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -45,6 +45,7 @@ pub struct Preprocessor<'a> { pub functions: Functions, pub memories: Memories, pub constants: Constants, + pub in_function: Option, args: &'a Args } @@ -57,6 +58,7 @@ impl<'a> Preprocessor<'a> { functions: HashMap::new(), memories: HashMap::new(), constants: HashMap::new(), + in_function: None } } @@ -65,7 +67,7 @@ impl<'a> Preprocessor<'a> { // println!("pre: has do tokens: {:?}", self.program.iter().map(|t| if t.typ == OpType::Keyword(KeywordType::Do) {Some(t)} else {None} ).collect::>>()); let mut f_inline = false; - let mut f_extern = false; + let mut f_export = false; let mut program: Vec = Vec::new(); @@ -94,7 +96,7 @@ impl<'a> Preprocessor<'a> { let mut include_code = String::new(); let mut pth = PathBuf::new(); - if include_path.text.chars().collect::>()[0] == '.' { + if include_path.text.chars().next().unwrap() == '.' { let p = Path::new(include_path.loc.0.as_str()); let p = p.parent().unwrap(); let p = p.join(&include_path.text); @@ -108,6 +110,7 @@ impl<'a> Preprocessor<'a> { if p.exists() { include_code = std::fs::read_to_string(p)?; + break; } } @@ -261,6 +264,9 @@ impl<'a> Preprocessor<'a> { } let mut pre = self.clone(); pre.program = prog; + if name.text.chars().next().unwrap() == '.' { + pre.in_function = Some(name.text[1..].to_string()); + } pre.preprocess()?; prog = pre.get_ops(); @@ -271,8 +277,8 @@ impl<'a> Preprocessor<'a> { tokens: Some(prog) }); - } else if f_extern { - f_extern = false; + } else if f_export { + f_export = false; self.functions.insert(name.text.clone(), Function{ loc: name.loc.clone(), name: name.text.clone(), @@ -341,9 +347,10 @@ impl<'a> Preprocessor<'a> { let mut name = rtokens.pop().unwrap(); // let mut should_warn = false; + - if let '0'..='9' = name.text.chars().next().unwrap() { - lerror!(&name.loc, "Constant name starts with a number which is not allowed"); + if let '0'..='9' | '.' = name.text.chars().next().unwrap() { + lerror!(&name.loc, "Constant name starts with a number or dot which is not allowed"); return Err(eyre!("")); } @@ -363,6 +370,7 @@ impl<'a> Preprocessor<'a> { } } } + // if should_warn { //TODO: add -W option in cli args to enable more warnings //lwarn!(&name.loc, "Constant name contains '(' or ')', this character is not supported but will be replaced with '__OP_PAREN__' or '__CL_PAREN__' respectively "); @@ -402,8 +410,8 @@ impl<'a> Preprocessor<'a> { } OpType::Keyword(KeywordType::Inline) => { - if f_extern { - lerror!(&op.loc, "Function is already marked as extern, function cannot be inline and extern at the same time"); + if f_export { + lerror!(&op.loc, "Function is already marked as exported, function cannot be inline and exported at the same time"); return Err(eyre!("")); } else if f_inline { lerror!(&op.loc, "Function is already marked as inline, remove this inline Keyword"); @@ -414,14 +422,18 @@ impl<'a> Preprocessor<'a> { } OpType::Keyword(KeywordType::Export) => { - if f_inline { - lerror!(&op.loc, "Function is already marked as inline, function cannot be inline and extern at the same time"); + if !crate::config::ENABLE_EXPORTED_FUNCTIONS { + lerror!(&op.loc, "Experimental feature Exported functions not enabled"); return Err(eyre!("")); - } else if f_extern { + } + if f_inline { + lerror!(&op.loc, "Function is already marked as inline, function cannot be inline and exported at the same time"); + return Err(eyre!("")); + } else if f_export { lerror!(&op.loc, "Function is already marked as extern, remove this extern Keyword"); return Err(eyre!("")); } else { - f_extern = true; + f_export = true; } } diff --git a/src/typechecker.rs b/src/typechecker.rs index 1d256b0..a7ca095 100644 --- a/src/typechecker.rs +++ b/src/typechecker.rs @@ -191,7 +191,10 @@ pub fn typecheck(ops: Vec, args: &Args, init_types: Option> InstructionType::PushStr => { stack.push(Types::Int); stack.push(Types::Ptr); - + }, + InstructionType::PushCStr => { + stack.push(Types::Int); + stack.push(Types::Ptr); }, InstructionType::Drop => { stack_pop(&mut stack, &op, &[Types::Any])?; diff --git a/test.mcl b/test.mcl index 4e424cf..a96f8fe 100644 --- a/test.mcl +++ b/test.mcl @@ -1,12 +1,7 @@ -// include "std.mcl" -fn mcl_print with int ptr returns void then - 1 1 syscall3 drop -done +include "std.mcl" -fn mcl_dump with int returns void then - _dbg_print -done +fn main with int ptr returns void then + // p l + c"bad, wait no, good\n\0" dup cstr_len swap puts -fn main with void returns void then - "hi\n" mcl_print -done +done \ No newline at end of file