mclangc/src/parser/expr.rs

462 lines
19 KiB
Rust

use std::collections::HashMap;
use anyhow::{bail, Result};
use crate::{common::loc::LocBox, debug, error, lerror, parser::{typ::parse_type, Punctuation}, tokeniser::Token};
use super::{ast::{expr::{Block, CallParams, Expr, IfBranchExpr, IfExpr, Path}, literal::Literal, TokenType}, parse_item, utils, Delimiter, Keyword};
const BINOP_LIST: &[TokenType] = &[
TokenType::Punct(Punctuation::Plus),
TokenType::Punct(Punctuation::Minus),
TokenType::Punct(Punctuation::Div),
TokenType::Punct(Punctuation::Star),
TokenType::Punct(Punctuation::Mod),
TokenType::Punct(Punctuation::Shl),
TokenType::Punct(Punctuation::Shr),
TokenType::Punct(Punctuation::AndAnd),
TokenType::Punct(Punctuation::OrOr),
TokenType::Punct(Punctuation::Ampersand),
TokenType::Punct(Punctuation::Or),
TokenType::Punct(Punctuation::Xor),
TokenType::Punct(Punctuation::AddEq),
TokenType::Punct(Punctuation::SubEq),
TokenType::Punct(Punctuation::DivEq),
TokenType::Punct(Punctuation::MulEq),
TokenType::Punct(Punctuation::ModEq),
TokenType::Punct(Punctuation::ShlEq),
TokenType::Punct(Punctuation::ShrEq),
TokenType::Punct(Punctuation::AndEq),
TokenType::Punct(Punctuation::OrEq),
TokenType::Punct(Punctuation::XorEq),
TokenType::Punct(Punctuation::Eq),
TokenType::Punct(Punctuation::EqEq),
TokenType::Punct(Punctuation::Lt),
TokenType::Punct(Punctuation::Gt),
TokenType::Punct(Punctuation::Le),
TokenType::Punct(Punctuation::Ge),
];
pub fn parse_expr(tokens: &mut Vec<Token>, precedence: usize, consume_semi: bool) -> Result<Option<LocBox<Expr>>> {
let res = if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::ParenL)) {
Some(parse_group(tokens)?)
} else
if let Some(_) = utils::check(tokens, TokenType::ident("")) {
let p = parse_path(tokens)?;
if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::CurlyL)) {
Some(parse_struct_literal(tokens, p.inner().unwrap_path())?)
} else {
Some(p)
}
} else
if let Some(_) = utils::check_from_many(tokens, &[
TokenType::Punct(Punctuation::Not),
TokenType::Punct(Punctuation::Plus),
TokenType::Punct(Punctuation::Minus),
TokenType::Punct(Punctuation::Ampersand),
TokenType::Punct(Punctuation::Star),
]) {
Some(parse_unop(tokens)?)
} else
if let Some(_) = utils::check_from_many(tokens, &[
TokenType::string("", false),
TokenType::number(0, 0, false),
TokenType::char('\0'),
TokenType::Delim(Delimiter::SquareL),
]) {
Some(parse_literal(tokens)?)
} else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::While)) {
return Ok(Some(parse_while_loop(tokens)?));
} else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::For)) {
return Ok(Some(parse_for_loop(tokens)?));
} else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Loop)) {
return Ok(Some(parse_inf_loop(tokens)?));
} else if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::Return)) {
return Ok(Some(parse_return(tokens)?));
} else if let Some(kw) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Break)) {
return Ok(Some(LocBox::new(kw.loc(), Expr::Break)));
} else if let Some(kw) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Continue)) {
return Ok(Some(LocBox::new(kw.loc(), Expr::Continue)));
} else if let Some(kw) = utils::check(tokens, TokenType::Keyword(Keyword::If)) {
return Ok(Some(LocBox::new(&kw.loc().clone(), Expr::If(parse_if(tokens)?))));
} else {
None
};
if let Some(mut res) = res {
if utils::check(tokens, TokenType::Punct(Punctuation::Fieldaccess)).is_some() {
res = parse_field_access(tokens, res)?;
}
if utils::check(tokens, TokenType::Punct(Punctuation::Arrow)).is_some() {
res =parse_ptr_field_access(tokens, res)?;
}
if utils::check(tokens, TokenType::Delim(Delimiter::ParenL)).is_some() {
res = parse_fn_call(tokens, res)?;
}
if utils::check(tokens, TokenType::Keyword(Keyword::As)).is_some() {
res = parse_cast(tokens, res)?;
}
if utils::check(tokens, TokenType::Delim(Delimiter::SquareL)).is_some() {
res = parse_array_index(tokens, res)?;
}
if let Some(_) = utils::check_from_many(tokens, BINOP_LIST) {
return Ok(Some(parse_binop(tokens, res, precedence)?));
} else {
if consume_semi {
_ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "Expected ; at the end of the expression")?;
}
return Ok(Some(res));
}
}
Ok(res)
}
fn parse_return(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let kw = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::Return), "")?;
let item = parse_expr(tokens, 0, true)?;
Ok(LocBox::new(kw.loc(), Expr::Return(Box::new(item))))
}
fn parse_cast(tokens: &mut Vec<Token>, left: LocBox<Expr>) -> Result<LocBox<Expr>> {
let kw = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::As), "")?;
let typ = parse_type(tokens)?;
Ok(LocBox::new(kw.loc(), Expr::Cast {
left: Box::new(left),
right: Box::new(typ)
}))
}
fn parse_if(tokens: &mut Vec<Token>) -> Result<IfExpr> {
let loc = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::If), "")?;
let Some(test) = parse_expr(tokens, 0, false)? else {
lerror!(loc.loc(), "Expected test for if statement, got nothing");
bail!("")
};
let block = if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::CurlyL)) {
if let Some(_) = utils::check_2_last(tokens, TokenType::Delim(Delimiter::CurlyR)) {
_ = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyR));
Block(Vec::new())
} else {
parse_block(tokens)?
}
} else {
lerror!(loc.loc(), "Expected '{{'");
bail!("")
};
if let Some(_) = utils::check_consume(tokens, TokenType::Keyword(Keyword::Else)) {
if let Some(_) = utils::check(tokens, TokenType::Keyword(Keyword::If)) {
let branch = IfBranchExpr::ElseIf(Box::new(parse_if(tokens)?));
Ok(IfExpr {
test: Box::new(test),
body: block,
else_if: Some(branch)
})
} else {
let branch = IfBranchExpr::Else(parse_block(tokens)?);
Ok(IfExpr {
test: Box::new(test),
body: block,
else_if: Some(branch)
})
}
} else {
Ok(IfExpr {
test: Box::new(test),
body: block,
else_if: None
})
}
}
fn parse_while_loop(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let kw = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::While), "")?;
let Some(test) = parse_expr(tokens, 0, false)? else {
lerror!(kw.loc(), "Expected test comparrison for while loop, got nothing");
bail!("")
};
let block = parse_block(tokens)?;
Ok(LocBox::new(kw.loc(), Expr::WhileLoop {
test: Box::new(test),
body: block
}))
}
fn parse_for_loop(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let kw = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::For), "")?;
let Some(pre) = parse_item(tokens)? else {
lerror!(kw.loc(), "Expected init stat for a for loop, got nothing");
bail!("")
};
// Semicolon parsed out by parse_item above
let Some(test) = parse_expr(tokens, 0, false)? else {
lerror!(kw.loc(), "Expected test comparrison for a for loop, got nothing");
bail!("")
};
_ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Semi), "");
let Some(post) = parse_expr(tokens, 0, false)? else {
lerror!(kw.loc(), "Expected post expression (usually an index increment) for a for loop, got nothing");
bail!("")
};
let block = parse_block(tokens)?;
Ok(LocBox::new(kw.loc(), Expr::ForLoop {
init: Box::new(pre),
test: Box::new(test),
on_loop: Box::new(post),
body: block
}))
}
fn parse_inf_loop(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let kw = utils::check_consume_or_err(tokens, TokenType::Keyword(Keyword::Loop), "")?;
let block = parse_block(tokens)?;
Ok(LocBox::new(kw.loc(), Expr::InfLoop { body: block }))
}
fn parse_fn_call(tokens: &mut Vec<Token>, left: LocBox<Expr>) -> Result<LocBox<Expr>> {
let start = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenL), "")?;
let mut params = Vec::new();
while !tokens.is_empty() {
if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::ParenR)) {
break;
}
let Some(param) = parse_expr(tokens, 0, false)? else {break};
params.push(param);
if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) {
if let None = utils::check(tokens, TokenType::Delim(Delimiter::ParenR)) {
lerror!(&utils::get_last_loc(), "Expected ',' or ')' but didnt find either");
bail!("")
}
}
}
_ = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenR), "");
Ok(LocBox::new(start.loc(), Expr::Call { path: Box::new(left), params: CallParams(params) }))
}
fn parse_array_index(tokens: &mut Vec<Token>, left: LocBox<Expr>) -> Result<LocBox<Expr>> {
let start = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareL), "")?;
let Some(idx) = parse_expr(tokens, 0, false)? else {
lerror!(start.loc(), "Expected index for in array index but found nothing.");
bail!("")
};
_ = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareR), "");
Ok(LocBox::new(start.loc(), Expr::ArrayIndex {
name: Box::new(left),
index: Box::new(idx)
}))
}
fn parse_field_access(tokens: &mut Vec<Token>, left: LocBox<Expr>) -> Result<LocBox<Expr>> {
let start = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Fieldaccess), "unreachable")?;
let right = if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Arrow)) {
let right = parse_path(tokens)?;
parse_ptr_field_access(tokens, right)?
} else if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Fieldaccess)) {
let right = parse_path(tokens)?;
parse_field_access(tokens, right)?
} else {
parse_path(tokens)?
};
Ok(LocBox::new(start.loc(), Expr::FieldAccess {
left: Box::new(Some(left)),
right: Box::new(right)
}))
}
fn parse_ptr_field_access(tokens: &mut Vec<Token>, left: LocBox<Expr>) -> Result<LocBox<Expr>> {
let start = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Arrow), "unreachable")?;
let right = if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Arrow)) {
let right = parse_path(tokens)?;
parse_ptr_field_access(tokens, right)?
} else if let Some(_) = utils::check_2_last(tokens, TokenType::Punct(Punctuation::Fieldaccess)) {
let right = parse_path(tokens)?;
parse_field_access(tokens, right)?
} else {
parse_path(tokens)?
};
Ok(LocBox::new(start.loc(), Expr::PtrFieldAccess {
left: Box::new(Some(left)),
right: Box::new(right)
}))
}
fn parse_literal(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
if let Some(tkn) = utils::check_consume(tokens, TokenType::string("", false)) {
let TokenType::String(str) = tkn.tt() else {unreachable!()};
return Ok(LocBox::new(tkn.loc(), Expr::Literal(Literal::String(str.clone()))));
} else
if let Some(tkn) = utils::check_consume(tokens, TokenType::number(0, 0, false)) {
let TokenType::Number(val) = tkn.tt() else {unreachable!()};
return Ok(LocBox::new(tkn.loc(), Expr::Literal(Literal::Number(val.clone()))));
} else
if let Some(tkn) = utils::check_consume(tokens, TokenType::char('\0')) {
let TokenType::Char(val) = tkn.tt() else {unreachable!()};
return Ok(LocBox::new(tkn.loc(), Expr::Literal(Literal::Char(val.clone()))));
} else
if let Some(start) = utils::check_consume(tokens, TokenType::Delim(Delimiter::SquareL)) {
if let Some(_) = utils::check_consume(tokens, TokenType::Delim(Delimiter::SquareR)) {
return Ok(LocBox::new(start.loc(), Expr::Literal(Literal::Array(Vec::new()))));
}
if *tokens[tokens.len()-2].tt() == TokenType::Punct(Punctuation::Comma) {
let first = parse_expr(tokens, 0, false)?;
let Some(first) = first else { unreachable!() };
let mut values = Vec::new();
values.push(first);
while !tokens.is_empty() {
let Some(val) = parse_expr(tokens, 0, false)? else{break};
values.push(val);
if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) {
break;
}
}
utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareR), "")?;
return Ok(LocBox::new(start.loc(), Expr::Literal(Literal::Array(values))));
} else if *tokens[tokens.len()-2].tt() == TokenType::Punct(Punctuation::Semi) {
let typ = parse_type(tokens)?;
let count = parse_expr(tokens, 0, false)?.unwrap();
utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::SquareR), "")?;
return Ok(LocBox::new(start.loc(), Expr::Literal(Literal::ArrayRepeat {
typ: Box::new(typ),
count: Box::new(count)
})));
} else {
if let Some(curr) = tokens.last() {
lerror!(start.loc(), "Expected a , or ; as a separator in a literal array (normal, or repeating, respectively), but found {}", curr.tt());
} else {
lerror!(start.loc(), "Expected a , or ; as a separator in a literal array (normal, or repeating, respectively), but found nothing");
}
bail!("")
}
}
unreachable!()
}
fn parse_struct_literal(tokens: &mut Vec<Token>, name: Path) -> Result<LocBox<Expr>> {
let start = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyL), "")?;
let mut fields = HashMap::new();
while !tokens.is_empty() {
if let Some(_) = utils::check_consume(tokens, TokenType::Delim(Delimiter::CurlyR)) {
break;
}
let name = utils::check_consume_or_err(tokens, TokenType::ident(""), "")?;
_ = utils::check_consume_or_err(tokens, TokenType::Punct(Punctuation::Colon), "")?;
let typ = parse_expr(tokens, 0, false)?.unwrap();
fields.insert(name.tt().unwrap_ident(), typ);
if let None = utils::check_consume(tokens, TokenType::Punct(Punctuation::Comma)) {
utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyR), "")?;
break;
}
}
Ok(LocBox::new(start.loc(), Expr::Struct { path: name, fields }))
}
fn parse_group(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let start = utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenL), "")?;
let Some(expr) = parse_expr(tokens, 0, false)? else {
lerror!(start.loc(), "Expected expr found nothing");
bail!("")
};
utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::ParenR), "")?;
Ok(LocBox::new(start.loc(), Expr::Group(Box::new(expr))))
}
fn parse_path(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let mut buf = Vec::new();
let part = utils::check_consume(tokens, TokenType::ident("")).unwrap();
buf.push(part.tt().unwrap_ident());
while let Some(_) = utils::check_consume(tokens, TokenType::Punct(Punctuation::Pathaccess)) {
let Some(part) = utils::check_consume(tokens, TokenType::ident("")) else {
break;
};
buf.push(part.tt().unwrap_ident());
}
Ok(LocBox::new(part.loc(), Expr::Path(Path(buf))))
}
fn parse_unop(tokens: &mut Vec<Token>) -> Result<LocBox<Expr>> {
let typ = utils::check_consume_or_err_from_many(tokens, &[
TokenType::Punct(Punctuation::Not),
TokenType::Punct(Punctuation::Plus),
TokenType::Punct(Punctuation::Minus),
TokenType::Punct(Punctuation::Ampersand),
TokenType::Punct(Punctuation::Star),
], "")?;
let loc = typ.loc().clone();
let TokenType::Punct(typ) = typ.tt().clone() else {unreachable!()};
let Some(right) = parse_expr(tokens, 5, false)? else {
lerror!(&loc, "Expected expression after unary token, found nothing");
bail!("")
};
Ok(LocBox::new(&loc, Expr::UnOp {
typ,
right: Box::new(right)
}))
}
fn parse_binop(tokens: &mut Vec<Token>, mut lhs: LocBox<Expr>, precedence: usize) -> Result<LocBox<Expr>> {
// TODO: https://en.wikipedia.org/wiki/Operator-precedence_parser#Pseudocode
loop {
let op_loc;
let op = match tokens.last() {
Some(op) if BINOP_LIST.contains(&op.tt()) => {
op_loc = op.loc().clone();
let TokenType::Punct(op) = op.tt() else {unreachable!()};
op.clone()
}
Some(op) if [
TokenType::Delim(Delimiter::ParenR),
TokenType::Punct(Punctuation::Semi)
].contains(&op.tt()) => {
break
}
Some(op) if matches!(&op.tt(), TokenType::Ident(_)) => {
lerror!(op.loc(), "Unexpected identifier, did you forget a semicolon? ';'");
bail!("");
}
Some(_) |
None => break,
};
debug!("OP: {op:?}");
let (lp, rp) = op.precedence().unwrap();
if lp < precedence {
break
}
_ = tokens.pop();
let Some(rhs) = parse_expr(tokens, rp, false)? else {break;};
lhs = LocBox::new(&op_loc, Expr::BinOp {
typ: op,
left: Box::new(lhs),
right: Box::new(rhs)
});
}
Ok(lhs)
}
pub fn parse_block(tokens: &mut Vec<Token>) -> Result<Block> {
utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyL), "")?;
let mut items = Vec::new();
while !tokens.is_empty() {
if let Some(_) = utils::check(tokens, TokenType::Delim(Delimiter::CurlyR)) {
break;
}
if let Some(item) = parse_item(tokens)? {
items.push(item);
} else {
break;
}
}
utils::check_consume_or_err(tokens, TokenType::Delim(Delimiter::CurlyR), "")?;
Ok(Block(items))
}