This commit is contained in:
Gvidas Juknevičius 2025-07-08 21:53:08 +03:00
parent 7433808320
commit 23d7e86c2c
Signed by: MCorange
GPG Key ID: 5BE6B533CB76FE86
7 changed files with 162 additions and 97 deletions

30
a.s
View File

@ -1,30 +0,0 @@
bits 64
section .text
global _start
_start:
call morph_f_main
mov rax, 60
mov rdi, 0
syscall
morph_f_main:
; -- PUSH_INT --
mov rax, 34
push rax
; -- PUSH_INT --
mov rax, 35
push rax
; -- OP_ADD --
pop rax
pop rbx
add rax, rbx
push rax
; -- PUSH_STR --
mov rax, morph_str_0
push rax
mov rax, 10
push rax
ret
section .bss
section .rodata
morph_str_0: db "hewo world"
morph_const_0: dq 1

View File

@ -1,6 +1,7 @@
#include "argparse.h"
#include "dynarray.h"
#include "logger.h"
#include "parser/ast.h"
#include "token.h"
#include <stdio.h>
@ -8,9 +9,13 @@
typedef struct comp_state_s {
char** strings;
size_t if_id;
size_t while_id;
} comp_state_t;
int write_op(ast_op_t* aop, FILE* f, comp_state_t state) {
int write_if_stat(ast_if_stat_t* if_stat, FILE* f, comp_state_t* state);
int write_op(ast_op_t* aop, FILE* f, comp_state_t* state) {
switch (aop->type) {
case (AOT_OP): {
@ -23,17 +28,17 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) {
}; break;
case (TT_PUSH_STR): {
fprintf(f, " ; -- PUSH_STR --\n");
fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state.strings));
fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state->strings));
fprintf(f, " push rax\n");
fprintf(f, " mov rax, %zu\n", strlen(op.str_v));
fprintf(f, " push rax\n");
dynarray_push(state.strings, op.str_v);
dynarray_push(state->strings, op.str_v);
}; break;
case (TT_PUSH_CSTR): {
fprintf(f, " ; -- PUSH_CSTR --\n");
fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state.strings));
fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state->strings));
fprintf(f, " push rax\n");
dynarray_push(state.strings, op.str_v);
dynarray_push(state->strings, op.str_v);
}; break;
case (TT_PUSH_BOOL): {
fprintf(f, " ; -- PUSH_BOOL --\n");
@ -51,6 +56,13 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) {
}; break;
case (TT_OP): {
switch (op.op_type) {
case (OP_CAST_BOOL):
case (OP_CAST_INT):
case (OP_CAST_PTR):
case (OP_COUNT__):
case (OP_NONE):
break;
case (OP_ADD): {
fprintf(f, " ; -- OP_ADD --\n");
fprintf(f, " pop rax\n");
@ -366,11 +378,11 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) {
case (OP_HERE): {
char* pos = malloc(1024 * 2);
snprintf(pos, 2048, "%s:%d:%d", op.loc.file, op.loc.line, op.loc.col);
fprintf(f, " mov rax, str_%zu\n", dynarray_length(state.strings));
fprintf(f, " mov rax, str_%zu\n", dynarray_length(state->strings));
fprintf(f, " push rax\n");
fprintf(f, " mov rax, %zu\n", strlen(op.str_v));
fprintf(f, " push rax\n");
dynarray_push(state.strings, op.str_v);
dynarray_push(state->strings, op.str_v);
}; break;
case (OP_PRINT): {
}; break;
@ -378,17 +390,42 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) {
}; break;
case (TT_KW):
case (TT_IDENT): {
log_warn(&op.loc, "Found a KW or IDENT where it shouldnt exists (compiler): %s", get_tok_str_dbg(&op));
// unreachable
} break;
}
}
case (AOT_IF): {
write_if_stat(&aop->if_stat, f, state);
}; break;
case (AOT_WHILE): {
size_t id = state->while_id++;
fprintf(f, " ;; -- OP_WHILE -- \n");
fprintf(f, "morph_while_%zu_cond:\n", id);
for (int i = 0; i < dynarray_length(aop->while_stat.condition); i++) {
write_op(&aop->while_stat.condition[i], f, state);
}
fprintf(f, " pop rax\n");
fprintf(f, " test rax, rax\n");
fprintf(f, " jz morph_while_%zu_end\n", id);
fprintf(f, " pop rax\n");
fprintf(f, "morph_while_%zu_start:\n", id);
for (int i = 0; i < dynarray_length(aop->while_stat.body); i++) {
write_op(&aop->while_stat.body[i], f, state);
}
fprintf(f, " jmp morph_while_%zu_cond:\n", id);
fprintf(f, "morph_while_%zu_end:\n", id);
}; break;
case (AOT_USE_CONST): {
fprintf(f, " ;; -- OP_USE_CONST -- \n");
fprintf(f, " mov rax, [morph_const_%zu]\n", aop->id);
fprintf(f, " push rax\n");
}; break;
case (AOT_USE_MEMORY): {
fprintf(f, " ;; -- OP_USE_MEMORY -- \n");
fprintf(f, " mov rax, morph_memory_%zu\n", aop->id);
fprintf(f, " push rax\n");
}; break;
case (AOT_CALL_FUNC): {
@ -397,6 +434,35 @@ int write_op(ast_op_t* aop, FILE* f, comp_state_t state) {
return 0;
}
int write_if_stat(ast_if_stat_t* if_stat, FILE* f, comp_state_t* state) {
size_t id = state->if_id++;
fprintf(f, " ; -- OP_IF \n");
for (int i = 0; i < dynarray_length(if_stat->condition); i++) {
write_op(&if_stat->condition[i], f, state);
}
fprintf(f, " pop rax\n");
fprintf(f, " test rax, rax\n");
fprintf(f, " jz morph_if_%zu_cond_false\n", id);
fprintf(f, " pop rax\n");
fprintf(f, "morph_if_%zu_cond_true:\n", id);
for (int i = 0; i < dynarray_length(if_stat->body); i++) {
write_op(&if_stat->body[i], f, state);
}
fprintf(f, " jmp morph_if_%zu_end\n", id);
fprintf(f, "morph_if_%zu_cond_false:\n", id);
if (if_stat->else_body) {
for (int i = 0; i < dynarray_length(if_stat->body); i++) {
write_op(&if_stat->body[i], f, state);
}
} else if (if_stat->is_elseif) {
write_if_stat(if_stat->elseif, f, state);
}
fprintf(f, "morph_if_%zu_end:\n", id);
return 0;
}
int compile_x86_64_linux_nasm(args_t* args, program_t* prog) {
FILE* f = fopen(args->asm_file, "w");
comp_state_t state = {0};
@ -406,18 +472,22 @@ int compile_x86_64_linux_nasm(args_t* args, program_t* prog) {
fprintf(f, "section .text\n");
fprintf(f, "global _start\n");
fprintf(f, "_start:\n");
fprintf(f, " mov rax, [rsp]\n");
fprintf(f, " mov [morph_i_argc], rax\n");
fprintf(f, " mov rax, [rsp+8]\n");
fprintf(f, " mov [morph_i_argv], rax\n");
fprintf(f, " call morph_f_main\n");
fprintf(f, " mov rax, 60\n");
fprintf(f, " mov rdi, 0\n");
fprintf(f, " syscall\n");
for (int i = 0; i < dynarray_length(prog->funcs); i++) {
size_t fn_len = dynarray_length(prog->funcs);
for (int i = 0; i < fn_len; i++) {
function_t func = prog->funcs[i];
fprintf(f, "morph_f_%s:\n", func.name);
for (int y = 0; y < dynarray_length(func.body); y++) {
ast_op_t aop = func.body[y];
if (write_op(&aop, f, state)) {
if (write_op(&aop, f, &state)) {
return 1;
}
}
@ -425,6 +495,8 @@ int compile_x86_64_linux_nasm(args_t* args, program_t* prog) {
}
fprintf(f, "section .bss\n");
fprintf(f, "morph_i_argc: resb 8\n");
fprintf(f, "morph_i_argv: resb 8\n");
for (int i = 0; i < dynarray_length(prog->memories); i++) {
memory_t mem = prog->memories[i];
fprintf(f, "morph_memory_%d: resb %zu ; Memory %s\n", i, mem.size, mem.name);
@ -443,7 +515,7 @@ int compile_x86_64_linux_nasm(args_t* args, program_t* prog) {
}; break;
case (TT_PUSH_CSTR):
case (TT_PUSH_STR): {
fprintf(f, "db %s\n", v->val.str_v);
fprintf(f, "db \"%s\"\n", v->val.str_v);
}; break;
case (TT_PUSH_CHAR):
fprintf(f, "db %c\n", v->val.char_v);

View File

@ -17,34 +17,17 @@ int main(int argc, const char** argv) {
}
char* str = read_to_string(args->input_files[0]);
if (!str) return 1;
token_t* tokens = tokenise_string((char*)args->input_files[0], str);
if (!tokens) return 1;
int token_count = dynarray_length(tokens);
for (int i = 0; i < token_count; i++) {
break;
token_t tok = tokens[i];
switch (tok.type) {
case (TT_KW): {
printf("TOK: KW %s\n", KW_LIST[tok.kw_type]);
} break;
case (TT_OP): {
printf("TOK: OP %s\n", OP_LIST[tok.op_type]);
} break;
case (TT_IDENT): {
printf("TOK: IDENT \"%s\"\n", tok.str_v);
} break;
case (TT_PUSH_INT): {
printf("TOK: PUSH_INT \"%zu\"\n", tok.int_v);
} break;
case (TT_PUSH_STR): {
printf("TOK: PUSH_STR \"%s\"\n", tok.str_v);
} break;
default:
break;
log_debug(&tokens[i].loc, "Token: %s", get_tok_str_dbg(&tokens[i]));
}
}
program_t* ast = parse(tokens);
program_t* prog = parse(tokens);
if (!prog) return 1;
pretty_print(tokens);
compile(args, ast);
compile(args, prog);
return 0;
}

View File

@ -145,7 +145,11 @@ int _parse(parser_state_t* state) {
dynarray_push(state->prog.memories, c);
}; break;
case (KW_FN): {
token_t name = *expect_token_type(state, TT_IDENT);
token_t* _name = expect_token_type(state, TT_IDENT);
if (!_name) {
return 1;
}
token_t name = *_name;
const char** in_args = dynarray_create(const char*);
const char** out_args = dynarray_create(const char*);
token_t tmp_tok = {0};
@ -162,6 +166,7 @@ int _parse(parser_state_t* state) {
expect_token_type_ex(state, TT_KW, KW_DO);
ast_op_t* body = parse_fnc_body(state);
if (!body) return 1;
function_t fnc = {.loc = name.loc, .args = in_args, .return_args = out_args, .body=body, .name=name.str_v};
dynarray_push(state->prog.funcs, fnc);
}; break;
@ -174,6 +179,7 @@ int _parse(parser_state_t* state) {
int parse_item(parser_state_t* state, ast_op_t** body) {
dynarray_pop(state->tokens, &state->curr_tok);
log_debug(&state->curr_tok.loc, "A: %s", get_tok_str_dbg(&state->curr_tok));
switch (state->curr_tok.type) {
case (TT_IDENT): {
size_t id = 0;
@ -202,12 +208,13 @@ int parse_item(parser_state_t* state, ast_op_t** body) {
dynarray_push(*body, v);
}; break;
case (TT_KW): {
log_debug(NULL, "KW");
switch (state->curr_tok.kw_type) {
case (KW_WHILE): {
ast_op_t* condition = dynarray_create(ast_op_t);
ast_op_t* whilebody = dynarray_create(ast_op_t);
while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) {
dynarray_pop(state->tokens, &state->curr_tok);
// dynarray_pop(state->tokens, &state->curr_tok);
if (parse_item(state, &condition)) {
return 1;
}
@ -226,24 +233,29 @@ int parse_item(parser_state_t* state, ast_op_t** body) {
dynarray_push(body, v);
}; break;
case (KW_IF): {
log_debug(NULL, "KW_IF");
ast_op_t* condition = dynarray_create(ast_op_t);
ast_op_t* ifbody = dynarray_create(ast_op_t);
ast_op_t* else_body = dynarray_create(ast_op_t);
bool is_else = false;
bool is_elseif = false;
while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) {
dynarray_pop(state->tokens, &state->curr_tok);
token_t* t;
while (dynarray_length(state->tokens)) {
if (test_token_type_ex(state, TT_KW, KW_DO)) {
dynarray_pop(state->tokens, NULL);
break;
}
if (parse_item(state, &condition)) {
log_error(NULL, "WAH");
return 1;
}
}
assert(expect_token_type_ex(state, TT_KW, KW_DO));
while (test_token_type_ex(state, TT_KW, KW_END) == NULL) {
dynarray_pop(state->tokens, &state->curr_tok);
while (dynarray_length(state->tokens)) {
if (test_token_type_ex(state, TT_KW, KW_END)) {
dynarray_pop(state->tokens, NULL);
break;
}
if (parse_item(state, &ifbody)) {
return 1;
}
if (test_token_type_ex(state, TT_KW, KW_ELSE)) {
assert(expect_token_type_ex(state, TT_KW, KW_ELSE));
is_else = true;
@ -253,33 +265,40 @@ int parse_item(parser_state_t* state, ast_op_t** body) {
break;
}
}
if (parse_item(state, &ifbody)) {
return 1;
}
}
if (is_else) {
while (test_token_type_ex(state, TT_KW, KW_END) == NULL) {
dynarray_pop(state->tokens, &state->curr_tok);
while (dynarray_length(state->tokens)) {
if (test_token_type_ex(state, TT_KW, KW_END)) {
dynarray_pop(state->tokens, NULL);
break;
}
if (parse_item(state, &else_body)) {
return 1;
}
}
assert(expect_token_type_ex(state, TT_KW, KW_END));
ast_if_stat_t is = {.body = ifbody, .is_elseif = false, .condition = condition, .else_body = else_body};
ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is};
dynarray_push(body, v);
dynarray_push(*body, v);
} else if (is_elseif) {
ast_op_t* tmp_body = dynarray_create(ast_op_t);
parse_item(state, &tmp_body);
if (parse_item(state, &tmp_body)) return 1;
ast_if_stat_t* elif_branch = malloc(sizeof(ast_if_stat_t));
memcpy(elif_branch, &tmp_body[0].if_stat, sizeof(ast_if_stat_t));
ast_if_stat_t is = {.body = ifbody, .is_elseif = true, .condition = condition, .elseif = elif_branch};
ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is};
dynarray_push(body, v);
dynarray_push(*body, v);
} else {
assert(expect_token_type_ex(state, TT_KW, KW_END));
ast_if_stat_t is = {.body = ifbody, .condition = condition};
ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is};
dynarray_push(body, v);
dynarray_push(*body, v);
}
}; break;
@ -302,5 +321,6 @@ ast_op_t* parse_fnc_body(parser_state_t* state) {
return NULL;
}
}
dynarray_pop(state->tokens, NULL);
return body;
}

View File

@ -17,7 +17,7 @@ const char* OP_LIST[] = {
[OP_LT] = "lt",
[OP_GE] = "ge",
[OP_LE] = "le",
[OP_NE] = "ne",
[OP_NE] = "neq",
[OP_SHR] = "shr",
[OP_SHL] = "shl",
[OP_BOR] = "bor",

View File

@ -62,12 +62,25 @@ int parse_int(const char* s, int* out) {
token_t* tokenise_string(char* file_path, char* str) {
const int str_len = strlen(str);
loc_t loc = {0};
loc.col=1;
loc.line=1;
loc.file = file_path;
token_t* tokens = dynarray_create(token_t);
for (int i = 0; i < str_len; i++) {
switch (str[i]) {
case ('/'): {
if (str[i + 1] == '/') {
while (i < str_len && str[i] != '\n') {
i += 1;
}
loc.col = 1;
loc.line += 1;
} else {
goto tokenise_ident;
}
}; break;
case (' '):
case ('\t'):
case ('\r'): {
@ -75,11 +88,12 @@ token_t* tokenise_string(char* file_path, char* str) {
continue;
}
case ('\n'): {
loc.col = 0;
loc.col = 1;
loc.line += 1;
continue;
}
case ('\''): {
loc_t cloc = loc;
char* buf = malloc(32);
bool escaped = false;
for (int y = 0;;) {
@ -125,12 +139,13 @@ token_t* tokenise_string(char* file_path, char* str) {
token_t tok = {
.type = TT_PUSH_CHAR,
.str_v = buf,
.loc = loc,
.loc = cloc,
};
dynarray_push(tokens, tok);
} break;
case ('c'):
case ('"'): {
loc_t cloc = loc;
bool is_cstr = false;
if (str[i] == 'c') {
if (str[i + 1] != '"') {
@ -142,7 +157,7 @@ token_t* tokenise_string(char* file_path, char* str) {
}
char* buf = malloc(1024 * 4);
bool escaped = false;
i+=1;
i += 1;
for (int y = 0;;) {
if (str[i] == '"' && !escaped) {
break;
@ -165,25 +180,24 @@ token_t* tokenise_string(char* file_path, char* str) {
token_t tok = {
.type = is_cstr ? TT_PUSH_CSTR : TT_PUSH_STR,
.str_v = buf,
.loc = loc,
.loc = cloc,
};
dynarray_push(tokens, tok);
} break;
default: {
tokenise_ident:
loc_t cloc = loc;
// TODO: manage memory better
// INFO: If you make an ident longer than 4kb i will murder you
char* buf = malloc(1024 * 4);
memset(buf, 0, 1024 * 4);
int buf_counter = 0;
while (i < str_len) {
if (str[i] == ' ' || str[i] == '\t' || str[i] == '\r' || str[i] == '\n') {
break;
}
while (i < str_len && !(str[i] == ' ' || str[i] == '\t' || str[i] == '\r' || str[i] == '\n')) {
buf[buf_counter++] = str[i];
loc.col += 1;
i += 1;
}
i -= 1; // adjust for the i++ on next loop
buf[buf_counter + 1] = '\0';
// PERF: I dont know if this makes it faster or slower, need 2 check
buf = realloc(buf, strlen(buf) + 1);
@ -194,7 +208,7 @@ token_t* tokenise_string(char* file_path, char* str) {
token_t tok = {
.type = TT_PUSH_INT,
.int_v = num,
.loc = loc,
.loc = cloc,
};
dynarray_push(tokens, tok);
goto loop_end;
@ -204,6 +218,7 @@ token_t* tokenise_string(char* file_path, char* str) {
token_t tok = {
.type = TT_KW,
.kw_type = (kw_type_t)i,
.loc = cloc,
};
dynarray_push(tokens, tok);
goto loop_end;
@ -215,7 +230,7 @@ token_t* tokenise_string(char* file_path, char* str) {
token_t tok = {
.type = TT_OP,
.op_type = (op_type_t)i,
.loc = loc,
.loc = cloc,
};
dynarray_push(tokens, tok);
goto loop_end;
@ -225,7 +240,7 @@ token_t* tokenise_string(char* file_path, char* str) {
token_t tok = {
.type = TT_IDENT,
.str_v = strdup(buf),
.loc = loc,
.loc = cloc,
};
dynarray_push(tokens, tok);
loop_end:

View File

@ -1,9 +1,14 @@
// vim: set ft=mclang:
const test 1 end
fn main with void returns int do
34 35 add __print__
"hewo world"
if 1 2 eq do
1
else if 3 4 neq do
2
else
3
end
__print__
end