diff --git a/a.s b/a.s index eef4bfc..e69de29 100644 --- a/a.s +++ b/a.s @@ -1,30 +0,0 @@ -bits 64 -section .text -global _start -_start: - call morph_f_main - mov rax, 60 - mov rdi, 0 - syscall -morph_f_main: - ; -- PUSH_INT -- - mov rax, 34 - push rax - ; -- PUSH_INT -- - mov rax, 35 - push rax - ; -- OP_ADD -- - pop rax - pop rbx - add rax, rbx - push rax - ; -- PUSH_STR -- - mov rax, morph_str_0 - push rax - mov rax, 10 - push rax - ret -section .bss -section .rodata -morph_str_0: db "hewo world" -morph_const_0: dq 1 diff --git a/src/compiler/targets/x86_64-linux-nasm.c b/src/compiler/targets/x86_64-linux-nasm.c index 610342f..2405840 100644 --- a/src/compiler/targets/x86_64-linux-nasm.c +++ b/src/compiler/targets/x86_64-linux-nasm.c @@ -1,6 +1,7 @@ #include "argparse.h" #include "dynarray.h" +#include "logger.h" #include "parser/ast.h" #include "token.h" #include @@ -8,9 +9,13 @@ typedef struct comp_state_s { char** strings; + size_t if_id; + size_t while_id; } comp_state_t; -int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { +int write_if_stat(ast_if_stat_t* if_stat, FILE* f, comp_state_t* state); + +int write_op(ast_op_t* aop, FILE* f, comp_state_t* state) { switch (aop->type) { case (AOT_OP): { @@ -23,17 +28,17 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { }; break; case (TT_PUSH_STR): { fprintf(f, " ; -- PUSH_STR --\n"); - fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state.strings)); + fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state->strings)); fprintf(f, " push rax\n"); fprintf(f, " mov rax, %zu\n", strlen(op.str_v)); fprintf(f, " push rax\n"); - dynarray_push(state.strings, op.str_v); + dynarray_push(state->strings, op.str_v); }; break; case (TT_PUSH_CSTR): { fprintf(f, " ; -- PUSH_CSTR --\n"); - fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state.strings)); + fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state->strings)); fprintf(f, " push rax\n"); - dynarray_push(state.strings, op.str_v); + dynarray_push(state->strings, op.str_v); }; break; case (TT_PUSH_BOOL): { fprintf(f, " ; -- PUSH_BOOL --\n"); @@ -51,6 +56,13 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { }; break; case (TT_OP): { switch (op.op_type) { + + case (OP_CAST_BOOL): + case (OP_CAST_INT): + case (OP_CAST_PTR): + case (OP_COUNT__): + case (OP_NONE): + break; case (OP_ADD): { fprintf(f, " ; -- OP_ADD --\n"); fprintf(f, " pop rax\n"); @@ -366,11 +378,11 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { case (OP_HERE): { char* pos = malloc(1024 * 2); snprintf(pos, 2048, "%s:%d:%d", op.loc.file, op.loc.line, op.loc.col); - fprintf(f, " mov rax, str_%zu\n", dynarray_length(state.strings)); + fprintf(f, " mov rax, str_%zu\n", dynarray_length(state->strings)); fprintf(f, " push rax\n"); fprintf(f, " mov rax, %zu\n", strlen(op.str_v)); fprintf(f, " push rax\n"); - dynarray_push(state.strings, op.str_v); + dynarray_push(state->strings, op.str_v); }; break; case (OP_PRINT): { }; break; @@ -378,17 +390,42 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { }; break; case (TT_KW): case (TT_IDENT): { + log_warn(&op.loc, "Found a KW or IDENT where it shouldnt exists (compiler): %s", get_tok_str_dbg(&op)); // unreachable } break; } } case (AOT_IF): { + write_if_stat(&aop->if_stat, f, state); }; break; case (AOT_WHILE): { + size_t id = state->while_id++; + fprintf(f, " ;; -- OP_WHILE -- \n"); + fprintf(f, "morph_while_%zu_cond:\n", id); + for (int i = 0; i < dynarray_length(aop->while_stat.condition); i++) { + write_op(&aop->while_stat.condition[i], f, state); + } + fprintf(f, " pop rax\n"); + fprintf(f, " test rax, rax\n"); + fprintf(f, " jz morph_while_%zu_end\n", id); + fprintf(f, " pop rax\n"); + fprintf(f, "morph_while_%zu_start:\n", id); + for (int i = 0; i < dynarray_length(aop->while_stat.body); i++) { + write_op(&aop->while_stat.body[i], f, state); + } + fprintf(f, " jmp morph_while_%zu_cond:\n", id); + fprintf(f, "morph_while_%zu_end:\n", id); }; break; case (AOT_USE_CONST): { + + fprintf(f, " ;; -- OP_USE_CONST -- \n"); + fprintf(f, " mov rax, [morph_const_%zu]\n", aop->id); + fprintf(f, " push rax\n"); }; break; case (AOT_USE_MEMORY): { + fprintf(f, " ;; -- OP_USE_MEMORY -- \n"); + fprintf(f, " mov rax, morph_memory_%zu\n", aop->id); + fprintf(f, " push rax\n"); }; break; case (AOT_CALL_FUNC): { @@ -397,6 +434,35 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { return 0; } +int write_if_stat(ast_if_stat_t* if_stat, FILE* f, comp_state_t* state) { + + size_t id = state->if_id++; + fprintf(f, " ; -- OP_IF \n"); + for (int i = 0; i < dynarray_length(if_stat->condition); i++) { + write_op(&if_stat->condition[i], f, state); + } + + fprintf(f, " pop rax\n"); + fprintf(f, " test rax, rax\n"); + fprintf(f, " jz morph_if_%zu_cond_false\n", id); + fprintf(f, " pop rax\n"); + fprintf(f, "morph_if_%zu_cond_true:\n", id); + for (int i = 0; i < dynarray_length(if_stat->body); i++) { + write_op(&if_stat->body[i], f, state); + } + fprintf(f, " jmp morph_if_%zu_end\n", id); + fprintf(f, "morph_if_%zu_cond_false:\n", id); + if (if_stat->else_body) { + for (int i = 0; i < dynarray_length(if_stat->body); i++) { + write_op(&if_stat->body[i], f, state); + } + } else if (if_stat->is_elseif) { + write_if_stat(if_stat->elseif, f, state); + } + fprintf(f, "morph_if_%zu_end:\n", id); + return 0; +} + int compile_x86_64_linux_nasm(args_t* args, program_t* prog) { FILE* f = fopen(args->asm_file, "w"); comp_state_t state = {0}; @@ -406,18 +472,22 @@ int compile_x86_64_linux_nasm(args_t* args, program_t* prog) { fprintf(f, "section .text\n"); fprintf(f, "global _start\n"); fprintf(f, "_start:\n"); + fprintf(f, " mov rax, [rsp]\n"); + fprintf(f, " mov [morph_i_argc], rax\n"); + fprintf(f, " mov rax, [rsp+8]\n"); + fprintf(f, " mov [morph_i_argv], rax\n"); fprintf(f, " call morph_f_main\n"); fprintf(f, " mov rax, 60\n"); fprintf(f, " mov rdi, 0\n"); fprintf(f, " syscall\n"); - - for (int i = 0; i < dynarray_length(prog->funcs); i++) { + size_t fn_len = dynarray_length(prog->funcs); + for (int i = 0; i < fn_len; i++) { function_t func = prog->funcs[i]; fprintf(f, "morph_f_%s:\n", func.name); for (int y = 0; y < dynarray_length(func.body); y++) { ast_op_t aop = func.body[y]; - if (write_op(&aop, f, state)) { + if (write_op(&aop, f, &state)) { return 1; } } @@ -425,6 +495,8 @@ int compile_x86_64_linux_nasm(args_t* args, program_t* prog) { } fprintf(f, "section .bss\n"); + fprintf(f, "morph_i_argc: resb 8\n"); + fprintf(f, "morph_i_argv: resb 8\n"); for (int i = 0; i < dynarray_length(prog->memories); i++) { memory_t mem = prog->memories[i]; fprintf(f, "morph_memory_%d: resb %zu ; Memory %s\n", i, mem.size, mem.name); @@ -443,7 +515,7 @@ int compile_x86_64_linux_nasm(args_t* args, program_t* prog) { }; break; case (TT_PUSH_CSTR): case (TT_PUSH_STR): { - fprintf(f, "db %s\n", v->val.str_v); + fprintf(f, "db \"%s\"\n", v->val.str_v); }; break; case (TT_PUSH_CHAR): fprintf(f, "db %c\n", v->val.char_v); diff --git a/src/main.c b/src/main.c index 43b7f9f..02a6c69 100644 --- a/src/main.c +++ b/src/main.c @@ -17,34 +17,17 @@ int main(int argc, const char** argv) { } char* str = read_to_string(args->input_files[0]); + if (!str) return 1; token_t* tokens = tokenise_string((char*)args->input_files[0], str); + if (!tokens) return 1; int token_count = dynarray_length(tokens); for (int i = 0; i < token_count; i++) { - break; - token_t tok = tokens[i]; - switch (tok.type) { - case (TT_KW): { - printf("TOK: KW %s\n", KW_LIST[tok.kw_type]); - } break; - case (TT_OP): { - printf("TOK: OP %s\n", OP_LIST[tok.op_type]); - } break; - case (TT_IDENT): { - printf("TOK: IDENT \"%s\"\n", tok.str_v); - } break; - case (TT_PUSH_INT): { - printf("TOK: PUSH_INT \"%zu\"\n", tok.int_v); - } break; - case (TT_PUSH_STR): { - printf("TOK: PUSH_STR \"%s\"\n", tok.str_v); - } break; - default: - break; - } + log_debug(&tokens[i].loc, "Token: %s", get_tok_str_dbg(&tokens[i])); } - program_t* ast = parse(tokens); + program_t* prog = parse(tokens); + if (!prog) return 1; pretty_print(tokens); - compile(args, ast); + compile(args, prog); return 0; } diff --git a/src/parser/parser.c b/src/parser/parser.c index 910e5ec..ba72b17 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -145,7 +145,11 @@ int _parse(parser_state_t* state) { dynarray_push(state->prog.memories, c); }; break; case (KW_FN): { - token_t name = *expect_token_type(state, TT_IDENT); + token_t* _name = expect_token_type(state, TT_IDENT); + if (!_name) { + return 1; + } + token_t name = *_name; const char** in_args = dynarray_create(const char*); const char** out_args = dynarray_create(const char*); token_t tmp_tok = {0}; @@ -162,6 +166,7 @@ int _parse(parser_state_t* state) { expect_token_type_ex(state, TT_KW, KW_DO); ast_op_t* body = parse_fnc_body(state); + if (!body) return 1; function_t fnc = {.loc = name.loc, .args = in_args, .return_args = out_args, .body=body, .name=name.str_v}; dynarray_push(state->prog.funcs, fnc); }; break; @@ -174,6 +179,7 @@ int _parse(parser_state_t* state) { int parse_item(parser_state_t* state, ast_op_t** body) { dynarray_pop(state->tokens, &state->curr_tok); + log_debug(&state->curr_tok.loc, "A: %s", get_tok_str_dbg(&state->curr_tok)); switch (state->curr_tok.type) { case (TT_IDENT): { size_t id = 0; @@ -202,12 +208,13 @@ int parse_item(parser_state_t* state, ast_op_t** body) { dynarray_push(*body, v); }; break; case (TT_KW): { + log_debug(NULL, "KW"); switch (state->curr_tok.kw_type) { case (KW_WHILE): { ast_op_t* condition = dynarray_create(ast_op_t); ast_op_t* whilebody = dynarray_create(ast_op_t); while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) { - dynarray_pop(state->tokens, &state->curr_tok); + // dynarray_pop(state->tokens, &state->curr_tok); if (parse_item(state, &condition)) { return 1; } @@ -226,24 +233,29 @@ int parse_item(parser_state_t* state, ast_op_t** body) { dynarray_push(body, v); }; break; case (KW_IF): { + log_debug(NULL, "KW_IF"); ast_op_t* condition = dynarray_create(ast_op_t); ast_op_t* ifbody = dynarray_create(ast_op_t); ast_op_t* else_body = dynarray_create(ast_op_t); bool is_else = false; bool is_elseif = false; - while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) { - dynarray_pop(state->tokens, &state->curr_tok); + token_t* t; + while (dynarray_length(state->tokens)) { + if (test_token_type_ex(state, TT_KW, KW_DO)) { + dynarray_pop(state->tokens, NULL); + break; + } if (parse_item(state, &condition)) { + log_error(NULL, "WAH"); return 1; } } - assert(expect_token_type_ex(state, TT_KW, KW_DO)); - while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { - dynarray_pop(state->tokens, &state->curr_tok); - - if (parse_item(state, &ifbody)) { - return 1; + while (dynarray_length(state->tokens)) { + if (test_token_type_ex(state, TT_KW, KW_END)) { + dynarray_pop(state->tokens, NULL); + break; } + if (test_token_type_ex(state, TT_KW, KW_ELSE)) { assert(expect_token_type_ex(state, TT_KW, KW_ELSE)); is_else = true; @@ -253,33 +265,40 @@ int parse_item(parser_state_t* state, ast_op_t** body) { break; } } + + if (parse_item(state, &ifbody)) { + return 1; + } } if (is_else) { - while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { - dynarray_pop(state->tokens, &state->curr_tok); + while (dynarray_length(state->tokens)) { + if (test_token_type_ex(state, TT_KW, KW_END)) { + dynarray_pop(state->tokens, NULL); + break; + } if (parse_item(state, &else_body)) { return 1; } } - assert(expect_token_type_ex(state, TT_KW, KW_END)); + ast_if_stat_t is = {.body = ifbody, .is_elseif = false, .condition = condition, .else_body = else_body}; ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is}; - dynarray_push(body, v); + dynarray_push(*body, v); } else if (is_elseif) { ast_op_t* tmp_body = dynarray_create(ast_op_t); - parse_item(state, &tmp_body); + if (parse_item(state, &tmp_body)) return 1; ast_if_stat_t* elif_branch = malloc(sizeof(ast_if_stat_t)); memcpy(elif_branch, &tmp_body[0].if_stat, sizeof(ast_if_stat_t)); ast_if_stat_t is = {.body = ifbody, .is_elseif = true, .condition = condition, .elseif = elif_branch}; ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is}; - dynarray_push(body, v); + dynarray_push(*body, v); } else { assert(expect_token_type_ex(state, TT_KW, KW_END)); ast_if_stat_t is = {.body = ifbody, .condition = condition}; ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is}; - dynarray_push(body, v); + dynarray_push(*body, v); } }; break; @@ -302,5 +321,6 @@ ast_op_t* parse_fnc_body(parser_state_t* state) { return NULL; } } + dynarray_pop(state->tokens, NULL); return body; } diff --git a/src/token.c b/src/token.c index 3284eee..24043a5 100644 --- a/src/token.c +++ b/src/token.c @@ -17,7 +17,7 @@ const char* OP_LIST[] = { [OP_LT] = "lt", [OP_GE] = "ge", [OP_LE] = "le", - [OP_NE] = "ne", + [OP_NE] = "neq", [OP_SHR] = "shr", [OP_SHL] = "shl", [OP_BOR] = "bor", diff --git a/src/tokeniser.c b/src/tokeniser.c index 270744a..4a0d24f 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -62,12 +62,25 @@ int parse_int(const char* s, int* out) { token_t* tokenise_string(char* file_path, char* str) { const int str_len = strlen(str); loc_t loc = {0}; + loc.col=1; + loc.line=1; loc.file = file_path; token_t* tokens = dynarray_create(token_t); for (int i = 0; i < str_len; i++) { switch (str[i]) { + case ('/'): { + if (str[i + 1] == '/') { + while (i < str_len && str[i] != '\n') { + i += 1; + } + loc.col = 1; + loc.line += 1; + } else { + goto tokenise_ident; + } + }; break; case (' '): case ('\t'): case ('\r'): { @@ -75,11 +88,12 @@ token_t* tokenise_string(char* file_path, char* str) { continue; } case ('\n'): { - loc.col = 0; + loc.col = 1; loc.line += 1; continue; } case ('\''): { + loc_t cloc = loc; char* buf = malloc(32); bool escaped = false; for (int y = 0;;) { @@ -125,12 +139,13 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_PUSH_CHAR, .str_v = buf, - .loc = loc, + .loc = cloc, }; dynarray_push(tokens, tok); } break; case ('c'): case ('"'): { + loc_t cloc = loc; bool is_cstr = false; if (str[i] == 'c') { if (str[i + 1] != '"') { @@ -142,7 +157,7 @@ token_t* tokenise_string(char* file_path, char* str) { } char* buf = malloc(1024 * 4); bool escaped = false; - i+=1; + i += 1; for (int y = 0;;) { if (str[i] == '"' && !escaped) { break; @@ -165,25 +180,24 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = is_cstr ? TT_PUSH_CSTR : TT_PUSH_STR, .str_v = buf, - .loc = loc, + .loc = cloc, }; dynarray_push(tokens, tok); } break; default: { tokenise_ident: + loc_t cloc = loc; // TODO: manage memory better // INFO: If you make an ident longer than 4kb i will murder you char* buf = malloc(1024 * 4); memset(buf, 0, 1024 * 4); int buf_counter = 0; - while (i < str_len) { - if (str[i] == ' ' || str[i] == '\t' || str[i] == '\r' || str[i] == '\n') { - break; - } + while (i < str_len && !(str[i] == ' ' || str[i] == '\t' || str[i] == '\r' || str[i] == '\n')) { buf[buf_counter++] = str[i]; loc.col += 1; i += 1; } + i -= 1; // adjust for the i++ on next loop buf[buf_counter + 1] = '\0'; // PERF: I dont know if this makes it faster or slower, need 2 check buf = realloc(buf, strlen(buf) + 1); @@ -194,7 +208,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_PUSH_INT, .int_v = num, - .loc = loc, + .loc = cloc, }; dynarray_push(tokens, tok); goto loop_end; @@ -204,6 +218,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_KW, .kw_type = (kw_type_t)i, + .loc = cloc, }; dynarray_push(tokens, tok); goto loop_end; @@ -215,7 +230,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_OP, .op_type = (op_type_t)i, - .loc = loc, + .loc = cloc, }; dynarray_push(tokens, tok); goto loop_end; @@ -225,7 +240,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_IDENT, .str_v = strdup(buf), - .loc = loc, + .loc = cloc, }; dynarray_push(tokens, tok); loop_end: diff --git a/test.mrph b/test.mrph index fe59f47..d67e820 100644 --- a/test.mrph +++ b/test.mrph @@ -1,9 +1,14 @@ - +// vim: set ft=mclang: const test 1 end fn main with void returns int do - 34 35 add __print__ - - "hewo world" + if 1 2 eq do + 1 + else if 3 4 neq do + 2 + else + 3 + end + __print__ end