From e63e9ef891374ac1085f8cd0a7e256c7b8d1dfc0 Mon Sep 17 00:00:00 2001 From: MCorange Date: Sat, 18 Mar 2023 22:46:11 +0200 Subject: [PATCH] implemented string literals --- src/compile/linux_x86_64.rs | 86 ++++++++++++++++++------------- src/compile/mod.rs | 1 + src/constants.rs | 13 +++-- src/interpret/linux_x86_64/mod.rs | 24 ++++++++- src/lexer.rs | 76 ++++++++++++++++++++------- src/parser.rs | 9 ++-- test.mcl | 2 +- tests/fail_unknown_word.mcl | 1 + 8 files changed, 146 insertions(+), 66 deletions(-) create mode 100644 tests/fail_unknown_word.mcl diff --git a/src/compile/linux_x86_64.rs b/src/compile/linux_x86_64.rs index 1b0be5c..00edbce 100644 --- a/src/compile/linux_x86_64.rs +++ b/src/compile/linux_x86_64.rs @@ -19,6 +19,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ let mut writer = BufWriter::new(&file); // println!("{}", tokens.len()); + let mut strings: Vec = Vec::new(); writeln!(writer, "BITS 64")?; writeln!(writer, "segment .text")?; @@ -67,27 +68,34 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ writeln!(writer, "addr_{}:", ti)?; match token.typ { // stack - OpType::Push => { - writeln!(writer, " ;; -- push {} --", token.value)?; + OpType::PushInt => { + writeln!(writer, " ;; -- push int {}", token.value)?; writeln!(writer, " mov rax, {}", token.value)?; writeln!(writer, " push rax")?; ti += 1; - }, + OpType::PushStr => { + writeln!(writer, " ;; -- push str \"{}\"", token.text)?; + writeln!(writer, " mov rax, {}", token.text.len())?; + writeln!(writer, " push rax")?; + writeln!(writer, " push str_{}", strings.len())?; + strings.push(token.text.clone()); + ti += 1; + } OpType::Drop => { - writeln!(writer, " ;; -- drop --")?; + writeln!(writer, " ;; -- drop")?; writeln!(writer, " pop rax")?; ti += 1; }, OpType::Print => { - writeln!(writer, " ;; -- print --")?; + writeln!(writer, " ;; -- print")?; writeln!(writer, " pop rdi")?; writeln!(writer, " call print")?; ti += 1; }, OpType::Dup => { - writeln!(writer, " ;; -- dup --")?; + writeln!(writer, " ;; -- dup")?; writeln!(writer, " pop rax")?; writeln!(writer, " push rax")?; writeln!(writer, " push rax")?; @@ -95,7 +103,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Dup2 => { - writeln!(writer, " ;; -- 2dup --")?; + writeln!(writer, " ;; -- 2dup")?; writeln!(writer, " pop rbx")?; writeln!(writer, " pop rax")?; writeln!(writer, " push rax")?; @@ -107,7 +115,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ }, OpType::Rot => { - writeln!(writer, " ;; -- rot --")?; + writeln!(writer, " ;; -- rot")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " pop rcx")?; @@ -118,7 +126,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Swap => { - writeln!(writer, " ;; -- swap --")?; + writeln!(writer, " ;; -- swap")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " push rax")?; @@ -127,7 +135,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Over => { - writeln!(writer, " ;; -- over --")?; + writeln!(writer, " ;; -- over")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " push rbx")?; @@ -139,12 +147,12 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ //mem OpType::Mem => { - writeln!(writer, " ;; -- mem --")?; + writeln!(writer, " ;; -- mem")?; writeln!(writer, " push mem")?; ti += 1; } OpType::Load8 => { - writeln!(writer, " ;; -- load --")?; + writeln!(writer, " ;; -- load")?; writeln!(writer, " pop rax")?; writeln!(writer, " xor rbx, rbx")?; writeln!(writer, " mov bl, [rax]")?; @@ -153,7 +161,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ } OpType::Store8 => { - writeln!(writer, " ;; -- store --")?; + writeln!(writer, " ;; -- store")?; writeln!(writer, " pop rbx")?; writeln!(writer, " pop rax")?; writeln!(writer, " mov [rax], bl")?; @@ -162,7 +170,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ // math OpType::Plus => { - writeln!(writer, " ;; -- plus --")?; + writeln!(writer, " ;; -- plus")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " add rax, rbx")?; @@ -170,7 +178,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Minus => { - writeln!(writer, " ;; -- minus --")?; + writeln!(writer, " ;; -- minus")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " sub rbx, rax")?; @@ -178,7 +186,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Equals => { - writeln!(writer, " ;; -- equals --")?; + writeln!(writer, " ;; -- equals")?; writeln!(writer, " mov rcx, 0")?; writeln!(writer, " mov rdx, 1")?; writeln!(writer, " pop rax")?; @@ -190,7 +198,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ }, OpType::Lt => { - writeln!(writer, " ;; -- lt --")?; + writeln!(writer, " ;; -- lt")?; writeln!(writer, " mov rcx, 0")?; writeln!(writer, " mov rdx, 1")?; writeln!(writer, " pop rbx")?; @@ -202,7 +210,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ }, OpType::Gt => { - writeln!(writer, " ;; -- gt --")?; + writeln!(writer, " ;; -- gt")?; writeln!(writer, " mov rcx, 0")?; writeln!(writer, " mov rdx, 1")?; writeln!(writer, " pop rbx")?; @@ -214,7 +222,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ }, OpType::Band => { - writeln!(writer, " ;; -- band --")?; + writeln!(writer, " ;; -- band")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " and rbx, rax")?; @@ -222,7 +230,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Bor => { - writeln!(writer, " ;; -- bor --")?; + writeln!(writer, " ;; -- bor")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " or rbx, rax")?; @@ -230,7 +238,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Shr => { - writeln!(writer, " ;; -- shr --")?; + writeln!(writer, " ;; -- shr")?; writeln!(writer, " pop rcx")?; writeln!(writer, " pop rbx")?; writeln!(writer, " shr rbx, cl")?; @@ -238,7 +246,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Shl => { - writeln!(writer, " ;; -- shl --")?; + writeln!(writer, " ;; -- shl")?; writeln!(writer, " pop rcx")?; writeln!(writer, " pop rbx")?; writeln!(writer, " shl rbx, cl")?; @@ -246,7 +254,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Div => { - writeln!(writer, " ;; -- div --")?; + writeln!(writer, " ;; -- div")?; writeln!(writer, " xor rdx, rdx")?; writeln!(writer, " pop rbx")?; writeln!(writer, " pop rax")?; @@ -256,7 +264,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Mul => { - writeln!(writer, " ;; -- mul --")?; + writeln!(writer, " ;; -- mul")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rbx")?; writeln!(writer, " mul rbx")?; @@ -268,44 +276,44 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ // block OpType::If => { - writeln!(writer, " ;; -- if --")?; + writeln!(writer, " ;; -- if")?; writeln!(writer, " pop rax")?; writeln!(writer, " test rax, rax")?; writeln!(writer, " jz addr_{}", token.jmp)?; ti += 1; }, OpType::Else => { - writeln!(writer, " ;; -- else --")?; + writeln!(writer, " ;; -- else")?; writeln!(writer, " jmp addr_{}", token.jmp)?; ti += 1; }, OpType::While => { - writeln!(writer, " ;; -- while --")?; + writeln!(writer, " ;; -- while")?; ti += 1; } OpType::Do => { - writeln!(writer, " ;; -- do --")?; + writeln!(writer, " ;; -- do")?; writeln!(writer, " pop rax")?; writeln!(writer, " test rax, rax")?; writeln!(writer, " jz addr_{}", token.jmp)?; ti += 1; } OpType::End => { - writeln!(writer, " ;; -- end --")?; + writeln!(writer, " ;; -- end")?; if ti + 1 != token.jmp as usize { writeln!(writer, " jmp addr_{}", token.jmp)?; } ti += 1; }, OpType::Syscall0 => { - writeln!(writer, " ;; -- syscall0 --")?; + writeln!(writer, " ;; -- syscall0")?; writeln!(writer, " pop rax")?; writeln!(writer, " syscall")?; writeln!(writer, " push rax")?; ti += 1; }, OpType::Syscall1 => { - writeln!(writer, " ;; -- syscall1 --")?; + writeln!(writer, " ;; -- syscall1")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rdi")?; writeln!(writer, " syscall")?; @@ -313,7 +321,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Syscall2 => { - writeln!(writer, " ;; -- syscall2 --")?; + writeln!(writer, " ;; -- syscall2")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rdi")?; writeln!(writer, " pop rsi")?; @@ -322,7 +330,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Syscall3 => { - writeln!(writer, " ;; -- syscall3 --")?; + writeln!(writer, " ;; -- syscall3")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rdi")?; writeln!(writer, " pop rsi")?; @@ -333,7 +341,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Syscall4 => { - writeln!(writer, " ;; -- syscall4 --")?; + writeln!(writer, " ;; -- syscall4")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rdi")?; writeln!(writer, " pop rsi")?; @@ -344,7 +352,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Syscall5 => { - writeln!(writer, " ;; -- syscall5 --")?; + writeln!(writer, " ;; -- syscall5")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rdi")?; writeln!(writer, " pop rsi")?; @@ -356,7 +364,7 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ ti += 1; }, OpType::Syscall6 => { - writeln!(writer, " ;; -- syscall6 --")?; + writeln!(writer, " ;; -- syscall6")?; writeln!(writer, " pop rax")?; writeln!(writer, " pop rdi")?; writeln!(writer, " pop rsi")?; @@ -374,6 +382,12 @@ pub fn compile(tokens: Vec, args: Args) -> Result<()>{ writeln!(writer, " mov rax, 60")?; writeln!(writer, " mov rdi, 0")?; writeln!(writer, " syscall")?; + writeln!(writer, "segment .data")?; + for s in 0..strings.len() { + let s_chars = strings[s].chars().map(|c| (c as u32).to_string()).collect::>(); + let s_list = s_chars.join(","); + writeln!(writer, " str_{}: db {} ; {}", s, s_list, strings[s].escape_default())?; + } writeln!(writer, "segment .bss")?; writeln!(writer, "mem: resb {}", crate::compile::MEM_SZ)?; diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 2550149..ba135f0 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -2,3 +2,4 @@ pub mod linux_x86_64; pub mod commands; pub const MEM_SZ: u32 = 640 * 1000; // 4kb +pub const STRING_SZ: u32 = 640 * 1000; // 4kb diff --git a/src/constants.rs b/src/constants.rs index 920586c..79c2c5b 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -3,7 +3,8 @@ pub enum OpType { // stack - Push, + PushInt, + PushStr, Drop, Print, Dup, @@ -54,16 +55,20 @@ pub enum OpType { pub struct Operator { pub typ: OpType, pub value: i64, + pub text: String, //? only used for OpType::PushStr + pub addr: i64, //? only used for OpType::PushStr pub jmp: i32, pub pos: (String, u32, u32) } impl Operator { - pub fn new(typ: OpType, value: i64, file: String, row: u32, col: u32) -> Self { + pub fn new(typ: OpType, value: i64, text: String, file: String, row: u32, col: u32) -> Self { Self { typ, value, jmp: 0, + addr: -1, + text, pos: (file, row, col) } } @@ -79,10 +84,10 @@ pub struct Token { pub typ: TokenType } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum TokenType { Word, Int, - // String, + String, //TODO: Add char } \ No newline at end of file diff --git a/src/interpret/linux_x86_64/mod.rs b/src/interpret/linux_x86_64/mod.rs index e475fa3..6043077 100644 --- a/src/interpret/linux_x86_64/mod.rs +++ b/src/interpret/linux_x86_64/mod.rs @@ -17,7 +17,8 @@ fn stack_pop(stack: &mut Vec, pos: &(String, u32, u32)) -> Result { pub fn run(tokens: Vec) -> Result<()>{ let mut stack: Vec = Vec::new(); let mut ti = 0; - let mut mem: Vec = vec![0; crate::compile::MEM_SZ as usize]; + let mut mem: Vec = vec![0; crate::compile::MEM_SZ as usize + crate::compile::STRING_SZ as usize]; + let mut string_idx = 0; // for token in &tokens { // println!("{{typ: \"{:?}\", val: {}, jmp: {}}}", token.typ, token.value, token.jmp); @@ -29,10 +30,27 @@ pub fn run(tokens: Vec) -> Result<()>{ match token.typ { // stack - OpType::Push => { + OpType::PushInt => { stack.push(token.value as u64); ti += 1; }, + OpType::PushStr => { + if token.addr < 0 { + stack.push(token.text.len() as u64); // string len + stack.push(string_idx + crate::compile::MEM_SZ as u64); + + for c in token.text.bytes() { + mem[crate::compile::MEM_SZ as usize + string_idx as usize] = c; + string_idx += 1; + } + } else { + stack.push(token.text.len() as u64); + stack.push(token.addr as u64); + } + + + ti += 1; + }, OpType::Drop => { stack.pop(); ti += 1; @@ -249,5 +267,7 @@ pub fn run(tokens: Vec) -> Result<()>{ }, } } + + Ok(()) } \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index ab65a9e..712ae64 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -2,19 +2,25 @@ use crate::constants::{Token, TokenType}; use color_eyre::Result; -fn lex_word(s: String) -> (TokenType, String) { +fn lex_word(s: String, tok_type: TokenType) -> (TokenType, String) { match s { - s if s.parse::().is_ok() => { // negative numbers not yet implemented + s if s.parse::().is_ok() && tok_type == TokenType::Word => { // negative numbers not yet implemented return (TokenType::Int, s); }, - s => { - return(TokenType::Word, s); + s if tok_type == TokenType::Word => { + return (TokenType::Word, s); + }, + s if tok_type == TokenType::String => { + return (tok_type, s); } + _ => panic!() } } -pub fn find_col(text: String, mut col: u32, predicate: F) -> Result where F: Fn(char) -> bool { - while (col as usize) < text.len() && !predicate(text.chars().nth(col as usize).unwrap()) { +pub fn find_col(text: String, mut col: u32, predicate: F) -> Result where F: Fn(char, char) -> bool { + let mut last = '\0'; + while (col as usize) < text.len() && !predicate(text.chars().nth(col as usize).unwrap(), last) { + last = text.chars().nth(col as usize).unwrap(); col += 1; } @@ -22,29 +28,59 @@ pub fn find_col(text: String, mut col: u32, predicate: F) -> Result wher } +// TODO: Implement multiline strings +fn lex_line(text: String) -> Result> { + let mut tokens: Vec<(u32, String, TokenType)> = Vec::new(); -fn lex_line(text: String) -> Result> { - let mut tokens: Vec<(u32, String)> = Vec::new(); - - let mut col = find_col(text.clone(), 0, |x| !x.is_whitespace())?; + let mut col = find_col(text.clone(), 0, |x, _| !x.is_whitespace())?; let mut col_end: u32 = 0; while col_end < text.clone().len() as u32 { - col_end = find_col(text.clone(), col, |x| x.is_whitespace())?; - let t = &text[(col as usize)..((col_end as usize))]; + if &text[(col as usize)..(col + 1) as usize] == "\"" { + col_end = find_col(text.clone(), col + 1, |x, x2| x == '"' && x2 != '\\')?; + let t = &text[((col + 1) as usize)..(col_end as usize)]; + let t = t.replace("\\n", "\n") + .replace("\\t", "\t") + .replace("\\r", "\r") + .replace("\\0", "\0"); + if !t.is_empty() { + tokens.push((col, t.to_string(), TokenType::String)); + } + col = find_col(text.clone(), col_end + 1, |x, _| !x.is_whitespace())?; - if t == "//" { - return Ok(tokens); - } + } else { - if !t.is_empty() { - tokens.push((col, t.to_string())); + col_end = find_col(text.clone(), col, |x, _| x.is_whitespace())?; + let t = &text[(col as usize)..((col_end as usize))]; + + if t == "//" { + return Ok(tokens); + } + + if !t.is_empty() { + tokens.push((col, t.to_string(), TokenType::Word)); + } + col = find_col(text.clone(), col_end, |x, _| !x.is_whitespace())?; } - col = find_col(text.clone(), col_end, |x| !x.is_whitespace())?; } Ok(tokens) } + +// fn lex_text(text: String) -> Result>{ +// let tokens: Vec = Vec::new(); + +// let mut row = 0; +// let mut col = 0; +// let mut index = find_col(text.clone(), 0, |x| x.is_whitespace())?; + +// while index < text.len() as u32 { + +// } + +// Ok(tokens) +// } + pub fn lex(code: String, file: &String) -> Result> { let lines: Vec<(usize, &str)> = code .split(['\n', '\r']) @@ -57,8 +93,8 @@ pub fn lex(code: String, file: &String) -> Result> { for (row, line) in lines { let lt = lex_line(line)?; - for (col, tok) in lt { - let (tok_type, tok) = lex_word(tok); + for (col, tok, tok_type) in lt { + let (tok_type, tok) = lex_word(tok, tok_type); let t = Token{ file: file.clone(), line: row + 1, diff --git a/src/parser.rs b/src/parser.rs index 979bc18..a8f2300 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,7 +16,7 @@ pub fn cross_ref(mut program: Vec) -> Result> { let if_ip = stack.pop().unwrap(); if program[if_ip as usize].typ != OpType::If { util::logger::pos_error(&op.clone().pos,"'end' can only close 'if' blocks"); - std::process::exit(1); // idc + return Err(eyre!("Bad block")); } // let mut if_og = &mut tokens[if_ip as usize]; @@ -84,11 +84,14 @@ impl Parser { match token.typ { TokenType::Word => { let word_type = lookup_word(token.text.clone(), &pos)?; - tokens.push(Operator { typ: word_type , value: 0, jmp: 0, pos: pos }); + tokens.push(Operator::new(word_type, 0, token.text.clone(), token.file.clone(), token.line, token.col)); }, TokenType::Int => {// negative numbers not yet implemented - tokens.push(Operator::new(OpType::Push, token.text.parse::()?, token.file.clone(), token.line, token.col)); + tokens.push(Operator::new(OpType::PushInt, token.text.parse::()?, String::new(), token.file.clone(), token.line, token.col)); }, + TokenType::String => { + tokens.push(Operator::new(OpType::PushStr, 0, token.text.clone(), token.file.clone(), token.line, token.col)); + } }; diff --git a/test.mcl b/test.mcl index 2fbba06..5022a1e 100644 --- a/test.mcl +++ b/test.mcl @@ -1 +1 @@ -1 2 3 2dup print print print print print \ No newline at end of file +"Hello world!\n\n\n\n\n\n" 1 1 syscall3 drop \ No newline at end of file diff --git a/tests/fail_unknown_word.mcl b/tests/fail_unknown_word.mcl new file mode 100644 index 0000000..13724a0 --- /dev/null +++ b/tests/fail_unknown_word.mcl @@ -0,0 +1 @@ +gftdesd5ryutfgyhibugtf6r4 \ No newline at end of file