From fed3be5614a809a3d5f0313b5554be7e196a4d87 Mon Sep 17 00:00:00 2001 From: MCorange Date: Sat, 18 Mar 2023 20:21:45 +0200 Subject: [PATCH] introduced notion of a token --- Cargo.toml | 1 - src/constants.rs | 11 ++- src/interpret/linux_x86_64/mod.rs | 2 +- src/lexer.rs | 15 +++- src/parser.rs | 130 ++++++++++++++++-------------- src/util.rs | 10 ++- 6 files changed, 101 insertions(+), 68 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f806427..dde79dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,6 @@ name = "mclang" version = "0.1.0" edition = "2021" - # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] diff --git a/src/constants.rs b/src/constants.rs index daed688..920586c 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -75,5 +75,14 @@ pub struct Token { pub file: String, pub line: u32, pub col: u32, - pub text: String + pub text: String, + pub typ: TokenType +} + +#[derive(Debug, Clone)] +pub enum TokenType { + Word, + Int, + // String, + //TODO: Add char } \ No newline at end of file diff --git a/src/interpret/linux_x86_64/mod.rs b/src/interpret/linux_x86_64/mod.rs index 19734f1..e475fa3 100644 --- a/src/interpret/linux_x86_64/mod.rs +++ b/src/interpret/linux_x86_64/mod.rs @@ -8,7 +8,7 @@ fn stack_pop(stack: &mut Vec, pos: &(String, u32, u32)) -> Result { match stack.pop() { Some(i) => Ok(i), None => { - util::logger::pos_error(pos.clone(), "Stack underflow"); + util::logger::pos_error(&pos.clone(), "Stack underflow"); Err(eyre!("Stack underflow")) }, } diff --git a/src/lexer.rs b/src/lexer.rs index ba9b1e9..ab65a9e 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,8 +1,17 @@ -use crate::constants::Token; +use crate::constants::{Token, TokenType}; use color_eyre::Result; - +fn lex_word(s: String) -> (TokenType, String) { + match s { + s if s.parse::().is_ok() => { // negative numbers not yet implemented + return (TokenType::Int, s); + }, + s => { + return(TokenType::Word, s); + } + } +} pub fn find_col(text: String, mut col: u32, predicate: F) -> Result where F: Fn(char) -> bool { while (col as usize) < text.len() && !predicate(text.chars().nth(col as usize).unwrap()) { @@ -49,11 +58,13 @@ pub fn lex(code: String, file: &String) -> Result> { for (row, line) in lines { let lt = lex_line(line)?; for (col, tok) in lt { + let (tok_type, tok) = lex_word(tok); let t = Token{ file: file.clone(), line: row + 1, col: col, text: tok, + typ: tok_type }; tokens.push(t); } diff --git a/src/parser.rs b/src/parser.rs index 51603fe..979bc18 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,4 +1,6 @@ -use crate::{constants::{Operator, OpType, Token}, util}; +use std::{collections::HashMap, ops::Deref}; + +use crate::{constants::{Operator, OpType, Token, TokenType}, util}; use color_eyre::Result; use eyre::eyre; @@ -13,7 +15,7 @@ pub fn cross_ref(mut program: Vec) -> Result> { OpType::Else => { let if_ip = stack.pop().unwrap(); if program[if_ip as usize].typ != OpType::If { - util::logger::pos_error(op.clone().pos,"'end' can only close 'if' blocks"); + util::logger::pos_error(&op.clone().pos,"'end' can only close 'if' blocks"); std::process::exit(1); // idc } @@ -35,7 +37,7 @@ pub fn cross_ref(mut program: Vec) -> Result> { program[ip].jmp = program[block_ip as usize].jmp; program[block_ip as usize].jmp = (ip + 1) as i32; } else { - util::logger::pos_error(op.clone().pos,"'end' can only close 'if' blocks"); + util::logger::pos_error(&op.clone().pos,"'end' can only close 'if' blocks"); std::process::exit(1); // idc } @@ -53,7 +55,7 @@ pub fn cross_ref(mut program: Vec) -> Result> { } if stack.len() > 0 { - util::logger::pos_error(program[stack.pop().expect("Empy stack") as usize].clone().pos,"Unclosed block"); + util::logger::pos_error(&program[stack.pop().expect("Empy stack") as usize].clone().pos,"Unclosed block"); return Err(eyre!("Unclosed block")); } @@ -79,65 +81,75 @@ impl Parser { continue; } let pos = (token.file.clone(), token.line, token.col); - match token.text.as_str() { - t if t.parse::().is_ok() => { // negative numbers not yet implemented - let num = t.parse::().unwrap(); - tokens.push(Operator::new(OpType::Push, num, token.file.clone(), token.line, token.col)); + match token.typ { + TokenType::Word => { + let word_type = lookup_word(token.text.clone(), &pos)?; + tokens.push(Operator { typ: word_type , value: 0, jmp: 0, pos: pos }); }, - - "print" => tokens.push(Operator::new(OpType::Print, 0, token.file.clone(), token.line, token.col)), - - // stack - "dup" => tokens.push(Operator::new(OpType::Dup, 0, token.file.clone(), token.line, token.col)), - "drop" => tokens.push(Operator::new(OpType::Drop, 0, token.file.clone(), token.line, token.col)), - "2dup" => tokens.push(Operator::new(OpType::Dup2, 0, token.file.clone(), token.line, token.col)), - "rot" => tokens.push(Operator::new(OpType::Rot, 0, token.file.clone(), token.line, token.col)), - "over" => tokens.push(Operator::new(OpType::Over, 0, token.file.clone(), token.line, token.col)), - "swap" => tokens.push(Operator::new(OpType::Swap, 0, token.file.clone(), token.line, token.col)), + TokenType::Int => {// negative numbers not yet implemented + tokens.push(Operator::new(OpType::Push, token.text.parse::()?, token.file.clone(), token.line, token.col)); + }, + }; - // comp and math - "+" => tokens.push(Operator::new(OpType::Plus, 0, token.file.clone(), token.line, token.col)), - "-" => tokens.push(Operator::new(OpType::Minus, 0, token.file.clone(), token.line, token.col)), - "=" => tokens.push(Operator::new(OpType::Equals, 0, token.file.clone(), token.line, token.col)), - ">" => tokens.push(Operator::new(OpType::Gt, 0, token.file.clone(), token.line, token.col)), - "<" => tokens.push(Operator::new(OpType::Lt, 0, token.file.clone(), token.line, token.col)), - "band" => tokens.push(Operator::new(OpType::Band, 0, token.file.clone(), token.line, token.col)), - "bor" => tokens.push(Operator::new(OpType::Bor, 0, token.file.clone(), token.line, token.col)), - "shr" => tokens.push(Operator::new(OpType::Shr, 0, token.file.clone(), token.line, token.col)), - "shl" => tokens.push(Operator::new(OpType::Shl, 0, token.file.clone(), token.line, token.col)), - "/" => tokens.push(Operator::new(OpType::Div, 0, token.file.clone(), token.line, token.col)), - "*" => tokens.push(Operator::new(OpType::Mul, 0, token.file.clone(), token.line, token.col)), - - // block - "if" => tokens.push(Operator::new(OpType::If, 0, token.file.clone(), token.line, token.col)), - "else" => tokens.push(Operator::new(OpType::Else, 0, token.file.clone(), token.line, token.col)), - "end" => tokens.push(Operator::new(OpType::End, 0, token.file.clone(), token.line, token.col)), - "while" => tokens.push(Operator::new(OpType::While, 0, token.file.clone(), token.line, token.col)), - "do" => tokens.push(Operator::new(OpType::Do, 0, token.file.clone(), token.line, token.col)), - - // mem - "mem" => tokens.push(Operator::new(OpType::Mem, 0, token.file.clone(), token.line, token.col)), - "!8" => tokens.push(Operator::new(OpType::Load8, 0, token.file.clone(), token.line, token.col)), - "@8" => tokens.push(Operator::new(OpType::Store8, 0, token.file.clone(), token.line, token.col)), - - "syscall0" => tokens.push(Operator::new(OpType::Syscall0, 0, token.file.clone(), token.line, token.col)), - "syscall1" => tokens.push(Operator::new(OpType::Syscall1, 0, token.file.clone(), token.line, token.col)), - "syscall2" => tokens.push(Operator::new(OpType::Syscall2, 0, token.file.clone(), token.line, token.col)), - "syscall3" => tokens.push(Operator::new(OpType::Syscall3, 0, token.file.clone(), token.line, token.col)), - "syscall4" => tokens.push(Operator::new(OpType::Syscall4, 0, token.file.clone(), token.line, token.col)), - "syscall5" => tokens.push(Operator::new(OpType::Syscall5, 0, token.file.clone(), token.line, token.col)), - "syscall6" => tokens.push(Operator::new(OpType::Syscall6, 0, token.file.clone(), token.line, token.col)), - - - - - t => { - util::logger::pos_error(pos, format!("Unknown token '{}'", t).as_str()); - return Err(eyre!("Unknown token")); - } - } + + //"print" => tokens.push(Operator::new(OpType::Print, 0, token.file.clone(), token.line, token.col)), } Ok(cross_ref(tokens)?) } +} + + +fn lookup_word>(s: String, pos: P) -> Result{ + let lookup_table: HashMap<&str, OpType> = HashMap::from([ + //stack + ("print", OpType::Print), + ("dup", OpType::Dup), + ("drop", OpType::Drop), + ("2dup", OpType::Dup2), + ("rot", OpType::Rot), + ("over", OpType::Over), + ("swap", OpType::Swap), + + // comp and math + ("+", OpType::Plus), + ("-", OpType::Minus), + ("=", OpType::Equals), + (">", OpType::Gt), + ("<", OpType::Lt), + ("band", OpType::Band), + ("bor", OpType::Bor), + ("shr", OpType::Shr), + ("shl", OpType::Shl), + ("/", OpType::Div), + ("*", OpType::Mul), + + // block + ("if", OpType::If), + ("else", OpType::Else), + ("end", OpType::End), + ("while", OpType::While), + ("do", OpType::Do), + + // mem + ("mem", OpType::Mem), + ("!8", OpType::Load8), + ("@8", OpType::Store8), + + ("syscall0", OpType::Syscall0), + ("syscall1", OpType::Syscall1), + ("syscall2", OpType::Syscall2), + ("syscall3", OpType::Syscall3), + ("syscall4", OpType::Syscall4), + ("syscall5", OpType::Syscall5), + ("syscall6", OpType::Syscall6), + ]); + + match lookup_table.get(s.as_str()) { + Some(v) => Ok(v.clone()), + None => { + util::logger::pos_error(pos, format!("Unknown word '{}'", s).as_str()); + return Err(eyre!("Unknown word")) + } + } } \ No newline at end of file diff --git a/src/util.rs b/src/util.rs index 5dc832a..9aa6d43 100644 --- a/src/util.rs +++ b/src/util.rs @@ -30,6 +30,8 @@ pub mod color { pub mod logger { #![allow(dead_code)] + use std::ops::Deref; + use crate::util::color; pub fn error(msg: &str) { @@ -49,19 +51,19 @@ pub mod logger { } - pub fn pos_error(pos: (String, u32, u32), msg: &str) { + pub fn pos_error>(pos: P, msg: &str) { println!("{f}:{r}:{c} {red}error{rs}: {msg}", red=color::FG_RED, rs=color::RESET, f=pos.0, r=pos.1, c=pos.2); } - pub fn pos_warn(pos: (String, u32, u32), msg: &str) { + pub fn pos_warn>(pos: P, msg: &str) { println!("{f}:{r}:{c} {yellow}warn{rs}: {msg}", yellow=color::FG_YELLOW, rs=color::RESET, f=pos.0, r=pos.1, c=pos.2); } - pub fn pos_info(pos: (String, u32, u32), msg: &str) { + pub fn pos_info>(pos: P, msg: &str) { println!("{f}:{r}:{c} {green}info{rs}: {msg}", green=color::FG_GREEN, rs=color::RESET, f=pos.0, r=pos.1, c=pos.2); } - pub fn pos_note(pos: (String, u32, u32), msg: &str) { + pub fn pos_note>(pos: P, msg: &str) { println!("{f}:{r}:{c} {blue}note{rs}: {msg}", blue=color::FG_BLUE, rs=color::RESET, f=pos.0, r=pos.1, c=pos.2); }