From e59f6a31ac639eb06685c47dcfabe8d9eaca21af Mon Sep 17 00:00:00 2001 From: MCorange Date: Mon, 7 Jul 2025 21:33:22 +0300 Subject: [PATCH] Outputing assembly now, :3 --- .vscode/launch.json | 17 + Makefile | 2 +- a.s | 23 ++ src/argparse.c | 128 +++++++ src/compiler/compiler.c | 18 + src/compiler/targets/x86_64-linux-nasm.c | 449 +++++++++++++++++++++++ src/dynarray.c | 38 +- src/include/argparse.h | 23 ++ src/include/compiler.h | 13 + src/include/dynarray.h | 10 +- src/include/parser/ast.h | 33 +- src/include/parser/parser.h | 2 +- src/include/parser/precomp.h | 3 +- src/include/path_utils.h | 6 + src/include/prettyprint.h | 21 ++ src/include/token.h | 5 +- src/main.c | 25 +- src/parser/parser.c | 253 ++++++++++++- src/parser/precomp.c | 318 +++++++--------- src/parser/tokcmp.c | 9 +- src/path_utils.c | 39 ++ src/prettyprint.c | 15 + src/token.c | 24 +- src/tokeniser.c | 9 +- test.mrph | 4 +- 25 files changed, 1222 insertions(+), 265 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 a.s create mode 100644 src/argparse.c create mode 100644 src/compiler/compiler.c create mode 100644 src/compiler/targets/x86_64-linux-nasm.c create mode 100644 src/include/argparse.h create mode 100644 src/include/compiler.h create mode 100644 src/include/path_utils.h create mode 100644 src/include/prettyprint.h create mode 100644 src/path_utils.c create mode 100644 src/prettyprint.c diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..49267e3 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "by-gdb", + "request": "launch", + "name": "Launch(gdb)", + "program": "${workspaceFolder}/build/morph", + "cwd": "${workspaceRoot}", + "programArgs": "./test.mrph", + "commandsBeforeExec": ["make -B"] + } + ] +} \ No newline at end of file diff --git a/Makefile b/Makefile index 0c632fc..0bfcc4a 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ LD=clang AR=ar cxx_sources=$(wildcard src/*.cpp) -c_sources=$(wildcard src/*.c) +c_sources=$(wildcard src/*.c) $(wildcard src/*/*.c) $(wildcard src/*/*/*.c) objects=$(patsubst src/%.cpp,$(BUILD_DIR)/obj/%.cpp.o,$(cxx_sources)) $(patsubst src/%.c,$(BUILD_DIR)/obj/%.c.o,$(c_sources)) diff --git a/a.s b/a.s new file mode 100644 index 0000000..5a934f4 --- /dev/null +++ b/a.s @@ -0,0 +1,23 @@ +bits 64 +section .text +global _start +_start: + call morph_f_main + mov rax, 60 + mov rdi, 0 + syscall +morph_f_main: + ; -- OP_ADD -- + pop rax + pop rbx + add rax, rbx + push rax + mov rax, morph_str_0 + push rax + mov rax, 10 + push rax + ret +section .bss +section .rodata +morph_str_0: db "hewo world" +morph_const_0: dq 1 diff --git a/src/argparse.c b/src/argparse.c new file mode 100644 index 0000000..ccec9d5 --- /dev/null +++ b/src/argparse.c @@ -0,0 +1,128 @@ +#include "dynarray.h" +#include "path_utils.h" +#include +#include +#include +#include +#include + +void show_help(const char* prog) { + printf("Usage: %s [flags] [input]\n", prog); + printf("Flags:\n"); + printf(" -h, --help - Show this help\n"); + printf(" -c, --compile - Compile only, dont link\n"); + printf(" -o, --output [file] - Output file\n"); + printf(" -I, --include [path] - Output file\n"); + printf(" -t, --target [target] - Compile target [default: %s]\n", DEFAULT_TARGET); + printf(" Available targets:\n"); + for (int y = 0; y < AVAILABLE_TARGET_COUNT; y++) { + printf(" - %s\n", AVAILABLE_TARGETS[y]); + } +} + +void show_quick_help(const char* prog) { + printf("Usage: %s [flags] [input]\n", prog); + printf("Run '%s --help' for more info.\n", prog); +} + +args_t* parse_args(int argc, const char** argv) { + args_t* args = malloc(sizeof(args_t)); + args->include_paths = dynarray_create(char*); + args->input_files = dynarray_create(char*); + + if (argc < 2) { + show_quick_help(argv[0]); + return NULL; + } + int i = 1; + for (; i < argc; i++) { + const char* arg = argv[i]; + int arg_len = strlen(arg); + if (strcmp(arg, "-") == 0) { + i += 1; + break; + } + if (arg[0] != '-') { + break; + } + + if (strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0) { + show_help(argv[0]); + return NULL; + } else if (strcmp(arg, "-o") == 0 || strcmp(arg, "--output") == 0) { + if (args->output_file) { + printf("ERROR: --output already set"); + show_quick_help(argv[0]); + return NULL; + } + if (argc <= i + 1) { + printf("ERROR: --output requires a file argument\n"); + show_quick_help(argv[0]); + return NULL; + } + strcpy(args->output_file, argv[++i]); + } else if (strcmp(arg, "-I") == 0 || strcmp(arg, "--include") == 0) { + if (argc <= i + 1) { + printf("ERROR: --include requires a path argument\n"); + show_quick_help(argv[0]); + return NULL; + } + dynarray_push(args->include_paths, argv[++i]); + } else if (strcmp(arg, "-t") == 0 || strcmp(arg, "--target") == 0) { + if (args->target) { + printf("ERROR: --target already set"); + show_quick_help(argv[0]); + return NULL; + } + if (argc <= i + 1) { + printf("ERROR: --target requires a string argument\n"); + show_quick_help(argv[0]); + return NULL; + } + for (int y = 0; y < AVAILABLE_TARGET_COUNT; y++) { + if (strcmp(AVAILABLE_TARGETS[y], argv[i + 1]) == 0) { + strcpy(args->target, argv[++i]); + continue; + } + } + + printf("ERROR: The target '%s' does not exists\n", argv[i + 1]); + show_quick_help(argv[0]); + return NULL; + } else if (strcmp(arg, "-c") == 0 || strcmp(arg, "--compile") == 0) { + args->comp_mode = CM_COMPILE; + } else { + printf("ERROR: Unknown argument: %s", arg); + show_quick_help(argv[0]); + return NULL; + } + } + + for (; i < argc; i++) { + dynarray_push(args->input_files, argv[i]); + } + if (dynarray_length(args->input_files) < 1) { + show_quick_help(argv[0]); + return NULL; + } + + // TODO: Fix this + if (dynarray_length(args->input_files) > 1) { + printf("ERROR: Currently only one input file is supported\n"); + show_quick_help(argv[0]); + return NULL; + } + + // defaults + if (!args->target) { + args->target = (char*)DEFAULT_TARGET; + } + if (!args->output_file) { + args->output_file = "a.out"; + } + + // autogen + args->asm_file = replace_extension(args->output_file, "s"); + args->obj_file = replace_extension(args->output_file, "o"); + return args; +} diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c new file mode 100644 index 0000000..b9d26f0 --- /dev/null +++ b/src/compiler/compiler.c @@ -0,0 +1,18 @@ +#include "argparse.h" +#include "parser/ast.h" +#include +#include + +// TODO: Change this based on the host pc +const char* DEFAULT_TARGET = "x86_64-linux-nasm"; +const char* AVAILABLE_TARGETS[] = {"x86_64-linux-nasm"}; +size_t AVAILABLE_TARGET_COUNT = sizeof(AVAILABLE_TARGETS) / sizeof(AVAILABLE_TARGETS[0]); + +int compile_x86_64_linux_nasm(args_t* args, program_t* prog); + +int compile(args_t* args, program_t* prog) { + if (strcmp(args->target, "x86_64-linux-nasm") == 0) { + compile_x86_64_linux_nasm(args, prog); + } + return 0; +} diff --git a/src/compiler/targets/x86_64-linux-nasm.c b/src/compiler/targets/x86_64-linux-nasm.c new file mode 100644 index 0000000..1e4829e --- /dev/null +++ b/src/compiler/targets/x86_64-linux-nasm.c @@ -0,0 +1,449 @@ + +#include "argparse.h" +#include "dynarray.h" +#include "parser/ast.h" +#include "token.h" +#include +#include + +typedef struct comp_state_s { + char** strings; +} comp_state_t; + +int write_op(ast_op_t* aop, FILE* f, comp_state_t state) { + + switch (aop->type) { + case (AOT_OP): { + token_t op = aop->op; + switch (op.type) { + case (TT_PUSH_INT): { + }; break; + case (TT_PUSH_STR): { + fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state.strings)); + fprintf(f, " push rax\n"); + fprintf(f, " mov rax, %zu\n", strlen(op.str_v)); + fprintf(f, " push rax\n"); + dynarray_push(state.strings, op.str_v); + }; break; + case (TT_PUSH_CSTR): { + fprintf(f, " mov rax, morph_str_%zu\n", dynarray_length(state.strings)); + fprintf(f, " push rax\n"); + dynarray_push(state.strings, op.str_v); + }; break; + case (TT_PUSH_BOOL): { + fprintf(f, " mov rax, %d\n", (int)op.bool_v); + fprintf(f, " push rax\n"); + }; break; + case (TT_PUSH_CHAR): { + fprintf(f, " mov rax, %d ; '%c'\n", (int)op.char_v, op.char_v); + fprintf(f, " push rax\n"); + }; break; + case (TT_PUSH_FLOAT): { + // TODO: do this + }; break; + case (TT_OP): { + switch (op.op_type) { + case (OP_ADD): { + fprintf(f, " ; -- OP_ADD --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " add rax, rbx\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SUB): { + fprintf(f, " ; -- OP_SUB --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " sub rax, rbx\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_MUL): { + fprintf(f, " ; -- OP_MUL --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " mul rbx\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_DIV): { + fprintf(f, " ; -- OP_DIV --\n"); + fprintf(f, " xor rdx, rdx\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " div rbx\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_MOD): { + fprintf(f, " ; -- OP_MOD --\n"); + fprintf(f, " xor rdx, rdx\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " div rbx\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_EQ): { + fprintf(f, " ; -- OP_EQ --\n"); + fprintf(f, " mov rcx, 0\n"); + fprintf(f, " mov rdx, 1\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " cmp rax, rbx\n"); + fprintf(f, " cmove rcx, rdx\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_GT): { + fprintf(f, " ; -- OP_GT --\n"); + fprintf(f, " mov rcx, 0\n"); + fprintf(f, " mov rdx, 1\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " cmp rax, rbx\n"); + fprintf(f, " cmovg rcx, rdx\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_LT): { + fprintf(f, " ; -- OP_LT --\n"); + fprintf(f, " mov rcx, 0\n"); + fprintf(f, " mov rdx, 1\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " cmp rax, rbx\n"); + fprintf(f, " cmovl rcx, rdx\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_GE): { + fprintf(f, " ; -- OP_GE --\n"); + fprintf(f, " mov rcx, 0\n"); + fprintf(f, " mov rdx, 1\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " cmp rax, rbx\n"); + fprintf(f, " cmovge rcx, rdx\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_LE): { + fprintf(f, " ; -- OP_LE --\n"); + fprintf(f, " mov rcx, 0\n"); + fprintf(f, " mov rdx, 1\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " cmp rax, rbx\n"); + fprintf(f, " cmovle rcx, rdx\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_NE): { + fprintf(f, " ; -- OP_NE --\n"); + fprintf(f, " mov rcx, 1\n"); + fprintf(f, " mov rdx, 0\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " cmp rax, rbx\n"); + fprintf(f, " cmove rcx, rdx\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_AND): { + fprintf(f, " ; -- OP_AND --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " test rax, rax\n"); + fprintf(f, " setnz al\n"); + fprintf(f, " test rbx, rbx\n"); + fprintf(f, " setnz bl\n"); + fprintf(f, " and al, bl\n"); + fprintf(f, " movzx rbx, al\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_OR): { + fprintf(f, " ; -- OP_OR --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " test rax, rax\n"); + fprintf(f, " setnz al\n"); + fprintf(f, " test rbx, rbx\n"); + fprintf(f, " setnz bl\n"); + fprintf(f, " or al, bl\n"); + fprintf(f, " movzx rbx, al\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_SHR): { + fprintf(f, " ; -- OP_SHR --\n"); + fprintf(f, " pop rcx\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " shr rbx, cl\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_SHL): { + fprintf(f, " ; -- OP_SHL --\n"); + fprintf(f, " pop rcx\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " shl rbx, cl\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_BOR): { + fprintf(f, " ; -- OP_BOR --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " or rbx, rax\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_BAND): { + fprintf(f, " ; -- OP_BAND --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " and rbx, rax\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_NOT): { + fprintf(f, " ; -- OP_NOT --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " test rax, rax\n"); + fprintf(f, " setz al\n"); + fprintf(f, " movzx rbx, al\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_DUP): { + fprintf(f, " ; -- OP_DUP --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " push rax\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SWAP): { + fprintf(f, " ; -- OP_SWAP --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " push rax\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_DROP): { + fprintf(f, " ; -- OP_DROP --\n"); + fprintf(f, " pop rax\n"); + }; break; + case (OP_OVER): { + fprintf(f, " ; -- OP_OVER --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " push rbx\n"); + fprintf(f, " push rax\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_ROT): { + fprintf(f, " ; -- OP_ROT --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rcx\n"); + fprintf(f, " push rbx\n"); + fprintf(f, " push rax\n"); + fprintf(f, " push rcx\n"); + }; break; + case (OP_LOAD8): { + fprintf(f, " ; -- OP_LOAD8 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " xor rbx, rbx\n"); + fprintf(f, " mov bl, byte [rax]\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_STORE8): { + fprintf(f, " ; -- OP_STORE8 --\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " mov byte [rax], bl\n"); + }; break; + case (OP_LOAD16): { + fprintf(f, " ; -- OP_LOAD16 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " xor rbx, rbx\n"); + fprintf(f, " mov bx, word [rax]\n"); + fprintf(f, " push rbx\n"); + }; break; + + case (OP_STORE16): { + fprintf(f, " ; -- OP_STORE16 --\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " mov word [rax], bx\n"); + }; break; + case (OP_LOAD32): { + fprintf(f, " ; -- OP_LOAD32 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " xor rbx, rbx\n"); + fprintf(f, " mov ebx, dword [rax]\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_STORE32): { + fprintf(f, " ; -- OP_STORE32 --\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " mov dword[rax], ebx\n"); + }; break; + case (OP_LOAD64): { + fprintf(f, " ; -- OP_LOAD64 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " xor rbx, rbx\n"); + fprintf(f, " mov rbx, qword [rax]\n"); + fprintf(f, " push rbx\n"); + }; break; + case (OP_STORE64): { + fprintf(f, " ; -- OP_STORE64 --\n"); + fprintf(f, " pop rbx\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " mov qword [rax], rbx\n"); + }; break; + case (OP_SYSCALL0): { + fprintf(f, " ; -- OP_SYSCALL0 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SYSCALL1): { + fprintf(f, " ; -- OP_SYSCALL1 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rdi\n"); + fprintf(f, " pop r9\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SYSCALL2): { + fprintf(f, " ; -- OP_SYSCALL2 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rdi\n"); + fprintf(f, " pop rsi\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SYSCALL3): { + fprintf(f, " ; -- OP_SYSCALL3 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rdi\n"); + fprintf(f, " pop rsi\n"); + fprintf(f, " pop rdx\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SYSCALL4): { + fprintf(f, " ; -- OP_SYSCALL4 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rdi\n"); + fprintf(f, " pop rsi\n"); + fprintf(f, " pop rdx\n"); + fprintf(f, " pop r10\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SYSCALL5): { + fprintf(f, " ; -- OP_SYSCALL5 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rdi\n"); + fprintf(f, " pop rsi\n"); + fprintf(f, " pop rdx\n"); + fprintf(f, " pop r10\n"); + fprintf(f, " pop r8\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_SYSCALL6): { + fprintf(f, " ; -- OP_SYSCALL6 --\n"); + fprintf(f, " pop rax\n"); + fprintf(f, " pop rdi\n"); + fprintf(f, " pop rsi\n"); + fprintf(f, " pop rdx\n"); + fprintf(f, " pop r10\n"); + fprintf(f, " pop r8\n"); + fprintf(f, " pop r9\n"); + fprintf(f, " syscall\n"); + fprintf(f, " push rax\n"); + }; break; + case (OP_ARGC): { + }; break; + case (OP_ARGV): { + }; break; + case (OP_HERE): { + char* pos = malloc(1024*2); + snprintf(pos, 2048, "%s:%d:%d", op.loc.file, op.loc.line, op.loc.col); + fprintf(f, " mov rax, str_%zu\n", dynarray_length(state.strings)); + fprintf(f, " push rax\n"); + fprintf(f, " mov rax, %zu\n", strlen(op.str_v)); + fprintf(f, " push rax\n"); + dynarray_push(state.strings, op.str_v); + }; break; + case (OP_PRINT): { + }; break; + } + }; break; + case (TT_KW): + case (TT_IDENT): { + // unreachable + } break; + } + } + case (AOT_IF): { + }; break; + case (AOT_WHILE): { + }; break; + case (AOT_USE_CONST): { + }; break; + case (AOT_USE_MEMORY): { + }; break; + case (AOT_CALL_FUNC): { + + }; break; + } + return 0; +} + +int compile_x86_64_linux_nasm(args_t* args, program_t* prog) { + FILE* f = fopen(args->asm_file, "w"); + comp_state_t state = {0}; + state.strings = dynarray_create(char*); + + fprintf(f, "bits 64\n"); + fprintf(f, "section .text\n"); + fprintf(f, "global _start\n"); + fprintf(f, "_start:\n"); + fprintf(f, " call morph_f_main\n"); + fprintf(f, " mov rax, 60\n"); + fprintf(f, " mov rdi, 0\n"); + fprintf(f, " syscall\n"); + + for (int i = 0; i < dynarray_length(prog->funcs); i++) { + function_t func = prog->funcs[i]; + + fprintf(f, "morph_f_%s:\n", func.name); + for (int y = 0; y < dynarray_length(func.body); y++) { + ast_op_t aop = func.body[y]; + if (write_op(&aop, f, state)) { + return 1; + } + } + fprintf(f, " ret\n"); + } + + + fprintf(f, "section .bss\n"); + for (int i = 0; i < dynarray_length(prog->memories); i++) { + memory_t mem = prog->memories[i]; + fprintf(f, "morph_memory_%d: resb %zu ; Memory %s\n", i, mem.size, mem.name); + } + fprintf(f, "section .rodata\n"); + + for (int i = 0; i < dynarray_length(state.strings); i++) { + fprintf(f, "morph_str_%d: db \"%s\"\n", i, state.strings[i]); + } + for (int i = 0; i < dynarray_length(prog->const_vars); i++) { + const_t* v = &prog->const_vars[i]; + fprintf(f, "morph_const_%d: ", i); + switch (v->val.type) { + case (TT_PUSH_INT): { + fprintf(f, "dq %zu\n", v->val.int_v); + }; break; + case (TT_PUSH_CSTR): + case (TT_PUSH_STR): { + fprintf(f, "db %s\n", v->val.str_v); + }; break; + case (TT_PUSH_CHAR): + fprintf(f, "db %c\n", v->val.char_v); + case (TT_PUSH_BOOL): + fprintf(f, "db %d\n", (int)v->val.bool_v); + default: + } + } + return 0; +} diff --git a/src/dynarray.c b/src/dynarray.c index 0d7ea77..6da1564 100644 --- a/src/dynarray.c +++ b/src/dynarray.c @@ -1,5 +1,6 @@ #include "dynarray.h" +#include #include #include @@ -25,6 +26,8 @@ void* _dynarray_create(size_t init_cap, size_t stride) { size_t header_size = DYNARRAY_FIELDS * sizeof(size_t); size_t arr_size = init_cap * stride; size_t* arr = (size_t*)malloc(header_size + arr_size); + assert(arr && "Could not create da"); + da_debug_t* arr_dbg = (da_debug_t*)((size_t*)arr); arr[CAPACITY] = init_cap; arr[LENGTH] = 0; arr[STRIDE] = stride; @@ -48,19 +51,24 @@ void _dynarray_field_set(void* arr, size_t field, size_t value) { // Allocates a new dynarray with twice the size of the one passed in, and // retaining the values that the original stored. void* _dynarray_resize(void* arr) { + da_debug_t* dbg = (da_debug_t*)((size_t*)arr-3); + size_t new_cap = DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr); void* temp = _dynarray_create( // Allocate new dynarray w/ more space. - DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr), dynarray_stride(arr)); + new_cap, dynarray_stride(arr)); memcpy(temp, arr, dynarray_length(arr) * dynarray_stride(arr)); // Copy erythin' over. _dynarray_field_set(temp, LENGTH, dynarray_length(arr)); // Set `length` field. + da_debug_t* dbg2 = (da_debug_t*)((size_t*)temp-3); _dynarray_destroy(arr); // Free previous array. return temp; } void* _dynarray_push(void* arr, void* xptr) { + da_debug_t* dbg = (da_debug_t*)((size_t*)arr-3); if (dynarray_length(arr) >= dynarray_capacity(arr)) { arr = _dynarray_resize(arr); + dbg = (da_debug_t*)((size_t*)arr-3); } memcpy(arr + dynarray_length(arr) * dynarray_stride(arr), xptr, dynarray_stride(arr)); @@ -70,6 +78,7 @@ void* _dynarray_push(void* arr, void* xptr) { // Removes the last element in the array, but copies it to `*dest` first. int _dynarray_pop(void* arr, void* dest) { + da_debug_t* dbg = (da_debug_t*)((size_t*)arr-3); if (dynarray_length(arr) < 1) { return 1; } @@ -82,15 +91,24 @@ int _dynarray_pop(void* arr, void* dest) { } // copies everything in reverse into a temporary array, and then memcpy's everything into the old array; -void _dynarray_reverse(void* arr) { - size_t stride = dynarray_stride(arr); - size_t length = dynarray_length(arr); - void* temp_arr = _dynarray_create(length, stride); +// +// - void* temp_item = malloc(stride); - for (int i = dynarray_length(arr); i > 0; i--) { - dynarray_pop(arr, temp_item); - memcpy(arr + i * stride, temp_item, stride); +void* _dynarray_reverse(void* arr) { + if (!arr) { + return NULL; } - memcpy(arr, temp_arr, stride * length); + + size_t len = dynarray_length(arr); + size_t stride = dynarray_stride(arr); + + void* tmp = malloc(stride); + for (size_t i = 0, j = len ? len - 1 : 0; i < j; ++i, --j) { + memcpy(tmp, arr + i * stride, stride); + memcpy(arr + i * stride, arr + j * stride, stride); + memcpy(arr + j * stride, tmp, stride); + } + + free(tmp); + return arr; } diff --git a/src/include/argparse.h b/src/include/argparse.h new file mode 100644 index 0000000..35cc94b --- /dev/null +++ b/src/include/argparse.h @@ -0,0 +1,23 @@ +#ifndef _H_MORPH_ARGPARSE +#define _H_MORPH_ARGPARSE + +typedef enum comp_mode_e { + CM_COMPILE_AND_LINK = 0, // default + CM_COMPILE, +} comp_mode_t; + +typedef struct args_s { + comp_mode_t comp_mode; + char* target; + char** include_paths; + char* output_file; + char** input_files; + + // autogen + char* asm_file; + char* obj_file; +} args_t; + +args_t* parse_args(int argc, const char** argv); + +#endif // !_H_MORPH_ARGPARSE diff --git a/src/include/compiler.h b/src/include/compiler.h new file mode 100644 index 0000000..07ab636 --- /dev/null +++ b/src/include/compiler.h @@ -0,0 +1,13 @@ +#include "argparse.h" +#include "parser/ast.h" +#ifndef _H_MORPH_COMPILER + +#include + +extern const char* DEFAULT_TARGET; +extern const char* AVAILABLE_TARGETS[]; +extern size_t AVAILABLE_TARGET_COUNT; + +int compile(args_t* args, program_t* prog); + +#endif // !_H_MORPH_COMPILER diff --git a/src/include/dynarray.h b/src/include/dynarray.h index 6f4944d..25c9767 100644 --- a/src/include/dynarray.h +++ b/src/include/dynarray.h @@ -3,6 +3,7 @@ #ifndef _H_DYNARRAY #define _H_DYNARRAY +#include #include // malloc #include // memcpy @@ -15,6 +16,13 @@ enum { CAPACITY, LENGTH, STRIDE, DYNARRAY_FIELDS }; +typedef struct da_debug_s { + size_t capacity; + size_t length; + size_t stride; + void* data; +} da_debug_t; + void* _dynarray_create(size_t length, size_t stride); void _dynarray_destroy(void* arr); @@ -26,7 +34,7 @@ void* _dynarray_resize(void* arr); void* _dynarray_push(void* arr, void* xptr); // OK = 0; ERR = 1 int _dynarray_pop(void* arr, void* dest); -void _dynarray_reverse(void* arr); +void* _dynarray_reverse(void* arr); #define DYNARRAY_DEFAULT_CAP 1 #define DYNARRAY_RESIZE_FACTOR 2 diff --git a/src/include/parser/ast.h b/src/include/parser/ast.h index d375d10..809071e 100644 --- a/src/include/parser/ast.h +++ b/src/include/parser/ast.h @@ -4,14 +4,11 @@ #include "loc.h" #include "token.h" #include +typedef struct ast_op_s ast_op_t; typedef struct const_s { const char* name; - - union { - const char* str_v; - size_t int_v; - }; + token_t val; } const_t; typedef struct memory_s { @@ -20,26 +17,22 @@ typedef struct memory_s { } memory_t; typedef struct ast_if_stat_s { - token_t* condition; - token_t* body; + ast_op_t* condition; + ast_op_t* body; bool is_elseif; union { struct ast_if_stat_s* elseif; - token_t else_body; + ast_op_t* else_body; }; } ast_if_stat_t; typedef struct ast_while_stat_s { - token_t* condition; - token_t* body; + ast_op_t* condition; + ast_op_t* body; } ast_while_stat_t; -typedef enum ast_op_type_e { - AOT_OP, - AOT_IF, - AOT_WHILE, -} ast_op_type_t; +typedef enum ast_op_type_e { AOT_OP, AOT_IF, AOT_WHILE, AOT_USE_CONST, AOT_USE_MEMORY, AOT_CALL_FUNC } ast_op_type_t; typedef struct ast_op_s { ast_op_type_t type; @@ -49,6 +42,8 @@ typedef struct ast_op_s { token_t op; ast_if_stat_t if_stat; ast_while_stat_t while_stat; + size_t id; + char* func_name; }; } ast_op_t; @@ -57,13 +52,13 @@ typedef struct function_s { const char* name; const char** args; const char** return_args; - ast_op_type_t* body; + ast_op_t* body; } function_t; typedef struct program_s { - const_t const_vars; - function_t funcs; - memory_t memories; + const_t* const_vars; + function_t* funcs; + memory_t* memories; } program_t; #endif // !_H_MORPH_AST diff --git a/src/include/parser/parser.h b/src/include/parser/parser.h index d818b69..7623cab 100644 --- a/src/include/parser/parser.h +++ b/src/include/parser/parser.h @@ -3,7 +3,7 @@ #include #include -program_t parse(const token_t* tokens); +program_t* parse(token_t* tokens); typedef struct parser_state_s { program_t prog; diff --git a/src/include/parser/precomp.h b/src/include/parser/precomp.h index 671d1ef..022f544 100644 --- a/src/include/parser/precomp.h +++ b/src/include/parser/precomp.h @@ -1,6 +1,7 @@ #ifndef _H_MORPH_PARSER_PRECOMP #define _H_MORPH_PARSER_PRECOMP +#include #include -int compile_value(token_t* tokens); +int compile_value(parser_state_t* state, token_t* tokens, token_t** tokens_out); #endif // _H_MORPH_PARSER_PRECOMP diff --git a/src/include/path_utils.h b/src/include/path_utils.h new file mode 100644 index 0000000..1ef35b2 --- /dev/null +++ b/src/include/path_utils.h @@ -0,0 +1,6 @@ +#ifndef _H_MORPH_PATH_UTILS +#define _H_MORPH_PATH_UTILS + +char* replace_extension(char* path, char* ext); + +#endif // !_H_MORPH_PATH_UTILS diff --git a/src/include/prettyprint.h b/src/include/prettyprint.h new file mode 100644 index 0000000..51ad8ec --- /dev/null +++ b/src/include/prettyprint.h @@ -0,0 +1,21 @@ +#ifndef _H_MORPH_PRETTYPRINT +#define _H_MORPH_PRETTYPRINT + +#include +#include + +enum { + PPM_NONE = 0, + +}; + +typedef struct { + uint8_t magic[2]; +} pp_stub_t; + +#define PP_MAGIC_PRE 0x4F +#define magic(post) ((PP_MAGIC_PRE << 8) & post) +#define pretty_print(structure) _pretty_print(0, (pp_stub_t*)structure); +void _pretty_print(size_t indent, pp_stub_t* structure); + +#endif // !_H_MORPH_PRETTYPRINT diff --git a/src/include/token.h b/src/include/token.h index 691f714..fd52368 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -6,12 +6,10 @@ #include typedef enum token_type_e { - TT_NONE = 0, TT_KW, TT_OP, TT_PUSH_STR, TT_PUSH_CSTR, - TT_PUSH_MEM, TT_PUSH_CHAR, TT_PUSH_INT, TT_PUSH_FLOAT, @@ -21,8 +19,7 @@ typedef enum token_type_e { typedef enum kw_type_e { // Include will be tokeniser level - KW_NONE = 0, - KW_FN, + KW_FN = 1, KW_DO, KW_END, KW_WITH, diff --git a/src/main.c b/src/main.c index e904b99..43b7f9f 100644 --- a/src/main.c +++ b/src/main.c @@ -1,23 +1,29 @@ +#include "argparse.h" +#include "logger.h" +#include "parser/ast.h" +#include "parser/parser.h" #include "dynarray.h" +#include "prettyprint.h" #include "token.h" #include #include #include +#include int main(int argc, const char** argv) { - if (argc < 2) { - printf("Usage: %s [source]\n", argv[0]); + args_t* args = parse_args(argc, argv); + if (!args) { return 1; } - char* str = read_to_string(argv[1]); - token_t* tokens = tokenise_string((char*)argv[1], str); + + char* str = read_to_string(args->input_files[0]); + token_t* tokens = tokenise_string((char*)args->input_files[0], str); int token_count = dynarray_length(tokens); for (int i = 0; i < token_count; i++) { + break; token_t tok = tokens[i]; switch (tok.type) { - case (TT_NONE): - break; case (TT_KW): { printf("TOK: KW %s\n", KW_LIST[tok.kw_type]); } break; @@ -30,10 +36,15 @@ int main(int argc, const char** argv) { case (TT_PUSH_INT): { printf("TOK: PUSH_INT \"%zu\"\n", tok.int_v); } break; + case (TT_PUSH_STR): { + printf("TOK: PUSH_STR \"%s\"\n", tok.str_v); + } break; default: break; } } - + program_t* ast = parse(tokens); + pretty_print(tokens); + compile(args, ast); return 0; } diff --git a/src/parser/parser.c b/src/parser/parser.c index dbc8396..910e5ec 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -2,6 +2,7 @@ #include "tokeniser.h" #include "util.h" #include +#include #include #include #include @@ -12,24 +13,63 @@ #include int _parse(parser_state_t* state); +ast_op_t* parse_fnc_body(parser_state_t* state); -program_t parse(const token_t* tokens) { +size_t find_mem_id(parser_state_t* state, const char* name) { + for (int i = 0; i < dynarray_length(state->prog.memories); i++) { + if (strcmp(state->prog.memories[i].name, name) == 0) { + return i; + } + } + return -1; +} + +size_t find_const_id(parser_state_t* state, const char* name) { + for (int i = 0; i < dynarray_length(state->prog.const_vars); i++) { + if (strcmp(state->prog.const_vars[i].name, name) == 0) { + return i; + } + } + return -1; +} + +char* find_func(parser_state_t* state, const char* name) { + for (int i = 0; i < dynarray_length(state->prog.funcs); i++) { + if (strcmp(state->prog.funcs[i].name, name) == 0) { + return (char*)name; + } + } + return NULL; +} + +program_t* parse(token_t* tokens) { // reverses tokens so its way easier to parse; - dynarray_reverse((void*)tokens); - parser_state_t state = {0}; - state.tokens = (token_t*)tokens; - (void)tokens; - _parse(&state); - return state.prog; + log_debug(NULL, "%zu tokens", dynarray_length(tokens)); + tokens = dynarray_reverse((void*)tokens); + static parser_state_t state = {0}; + log_debug(NULL, "%zu tokens", dynarray_length(tokens)); + state.tokens = tokens; + state.prog.const_vars = dynarray_create(const_t); + state.prog.memories = dynarray_create(memory_t); + state.prog.funcs = dynarray_create(function_t); + + if (_parse(&state) != 0) { + return NULL; + } + return &state.prog; } int _parse(parser_state_t* state) { - while (dynarray_pop(state->tokens, &state->curr_tok) == 0) { + log_debug(&state->curr_tok.loc, "Starting parsing (%zu tokens)", dynarray_length(state->tokens)); + while (dynarray_length(state->tokens)) { + dynarray_pop(state->tokens, &state->curr_tok); + //log_debug(&state->curr_tok.loc, "Found Token"); if (state->curr_tok.type != TT_KW) { const char* dbg_s = get_tok_str_dbg(&state->curr_tok); log_error(&state->curr_tok.loc, "Invalid word, expected Keyword, got %s.", dbg_s); free((void*)dbg_s); } + //log_debug(&state->curr_tok.loc, "Its a KW"); switch (state->curr_tok.kw_type) { case (KW_INCLUDE): { expect_token_type(state, TT_PUSH_STR); @@ -46,15 +86,15 @@ int _parse(parser_state_t* state) { } }; break; case (KW_CONST): { - // TODO: Implement compile time calculation of the value token_t* tokens = dynarray_create(token_t); - + token_t name = *expect_token_type(state, TT_IDENT); while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { // clang-format off if ( test_token_type(state, TT_PUSH_STR) || test_token_type(state, TT_PUSH_CSTR) || test_token_type(state, TT_PUSH_CHAR) || test_token_type(state, TT_PUSH_INT) || - test_token_type(state, TT_PUSH_FLOAT) || test_token_type(state, TT_PUSH_MEM) + test_token_type(state, TT_PUSH_FLOAT) || + test_token_type(state, TT_OP) || test_token_type(state, TT_IDENT) ) { // clang-format on dynarray_pop(state->tokens, &state->curr_tok); dynarray_push(tokens, state->curr_tok); @@ -63,13 +103,67 @@ int _parse(parser_state_t* state) { if (!expect_token_type_ex(state, TT_KW, KW_END)) { return 1; } - + token_t* tokens_out = NULL; + if (compile_value(state, tokens, &tokens_out) != 0) { + return 1; + } + dynarray_destroy(tokens); + if (dynarray_length(tokens_out) != 1) { + log_error(&name.loc, "Constants can only have one value after calculation"); + return 1; + } + token_t value; + dynarray_pop(tokens_out, &value); + const_t c = {.name = name.str_v, .val = value}; + dynarray_push(state->prog.const_vars, c); }; break; case (KW_MEMORY): { - + token_t* tokens = dynarray_create(token_t); + token_t name = *expect_token_type(state, TT_IDENT); + while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { + // clang-format off + if (test_token_type(state, TT_PUSH_INT) || test_token_type(state, TT_OP) || test_token_type(state, TT_IDENT) ) { // clang-format on + dynarray_pop(state->tokens, &state->curr_tok); + dynarray_push(tokens, state->curr_tok); + } + } + if (!expect_token_type_ex(state, TT_KW, KW_END)) { + return 1; + } + token_t* tokens_out = NULL; + if (compile_value(state, tokens, &tokens_out) != 0) { + return 1; + } + dynarray_destroy(tokens); + if (dynarray_length(tokens_out) != 1 || tokens_out[0].type != TT_PUSH_INT) { + log_error(&name.loc, "Memories can only have one Int value after calculation"); + return 1; + } + token_t* value; + dynarray_pop(tokens_out, value); + memory_t c = {.name = name.str_v, .size = value->int_v}; + dynarray_push(state->prog.memories, c); }; break; case (KW_FN): { + token_t name = *expect_token_type(state, TT_IDENT); + const char** in_args = dynarray_create(const char*); + const char** out_args = dynarray_create(const char*); + token_t tmp_tok = {0}; + expect_token_type_ex(state, TT_KW, KW_WITH); + while (test_token_type_ex(state, TT_KW, KW_RETURNS) == NULL) { + dynarray_pop(state->tokens, &tmp_tok); + dynarray_push(in_args, tmp_tok.str_v); + } + expect_token_type_ex(state, TT_KW, KW_RETURNS); + while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) { + dynarray_pop(state->tokens, &tmp_tok); + dynarray_push(in_args, tmp_tok.str_v); + } + expect_token_type_ex(state, TT_KW, KW_DO); + ast_op_t* body = parse_fnc_body(state); + function_t fnc = {.loc = name.loc, .args = in_args, .return_args = out_args, .body=body, .name=name.str_v}; + dynarray_push(state->prog.funcs, fnc); }; break; default: assert(true && "TODO: parse all kw's"); @@ -77,3 +171,136 @@ int _parse(parser_state_t* state) { } return 0; } + +int parse_item(parser_state_t* state, ast_op_t** body) { + dynarray_pop(state->tokens, &state->curr_tok); + switch (state->curr_tok.type) { + case (TT_IDENT): { + size_t id = 0; + if ((id = find_const_id(state, state->curr_tok.str_v)) != -1) { + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_USE_CONST, .id = id}; + dynarray_push(*body, v); + } else if ((id = find_mem_id(state, state->curr_tok.str_v)) != -1) { + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_USE_MEMORY, .id = id}; + dynarray_push(*body, v); + } else if (find_func(state, state->curr_tok.str_v)) { + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_CALL_FUNC, .func_name = (char*)state->curr_tok.str_v}; + dynarray_push(*body, v); + } else { + log_error(&state->curr_tok.loc, "Unknown label '%s'", state->curr_tok.str_v); + return 1; + } + }; break; + case (TT_PUSH_BOOL): + case (TT_PUSH_INT): + case (TT_PUSH_FLOAT): + case (TT_PUSH_CHAR): + case (TT_PUSH_STR): + case (TT_PUSH_CSTR): + case (TT_OP): { + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_OP, .op = state->curr_tok}; + dynarray_push(*body, v); + }; break; + case (TT_KW): { + switch (state->curr_tok.kw_type) { + case (KW_WHILE): { + ast_op_t* condition = dynarray_create(ast_op_t); + ast_op_t* whilebody = dynarray_create(ast_op_t); + while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) { + dynarray_pop(state->tokens, &state->curr_tok); + if (parse_item(state, &condition)) { + return 1; + } + } + assert(expect_token_type_ex(state, TT_KW, KW_DO)); + while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { + dynarray_pop(state->tokens, &state->curr_tok); + + if (parse_item(state, &whilebody)) { + return 1; + } + } + assert(expect_token_type_ex(state, TT_KW, KW_END)); + ast_while_stat_t ws = {.body = whilebody, .condition = condition}; + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_WHILE, .while_stat = ws}; + dynarray_push(body, v); + }; break; + case (KW_IF): { + ast_op_t* condition = dynarray_create(ast_op_t); + ast_op_t* ifbody = dynarray_create(ast_op_t); + ast_op_t* else_body = dynarray_create(ast_op_t); + bool is_else = false; + bool is_elseif = false; + while (test_token_type_ex(state, TT_KW, KW_DO) == NULL) { + dynarray_pop(state->tokens, &state->curr_tok); + if (parse_item(state, &condition)) { + return 1; + } + } + assert(expect_token_type_ex(state, TT_KW, KW_DO)); + while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { + dynarray_pop(state->tokens, &state->curr_tok); + + if (parse_item(state, &ifbody)) { + return 1; + } + if (test_token_type_ex(state, TT_KW, KW_ELSE)) { + assert(expect_token_type_ex(state, TT_KW, KW_ELSE)); + is_else = true; + if (test_token_type_ex(state, TT_KW, KW_IF)) { + is_elseif = true; + is_else = false; + break; + } + } + } + if (is_else) { + while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { + dynarray_pop(state->tokens, &state->curr_tok); + if (parse_item(state, &else_body)) { + return 1; + } + } + assert(expect_token_type_ex(state, TT_KW, KW_END)); + ast_if_stat_t is = {.body = ifbody, .is_elseif = false, .condition = condition, .else_body = else_body}; + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is}; + dynarray_push(body, v); + } else if (is_elseif) { + ast_op_t* tmp_body = dynarray_create(ast_op_t); + parse_item(state, &tmp_body); + + ast_if_stat_t* elif_branch = malloc(sizeof(ast_if_stat_t)); + memcpy(elif_branch, &tmp_body[0].if_stat, sizeof(ast_if_stat_t)); + + ast_if_stat_t is = {.body = ifbody, .is_elseif = true, .condition = condition, .elseif = elif_branch}; + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is}; + dynarray_push(body, v); + } else { + assert(expect_token_type_ex(state, TT_KW, KW_END)); + ast_if_stat_t is = {.body = ifbody, .condition = condition}; + ast_op_t v = {.loc = state->curr_tok.loc, .type = AOT_IF, .if_stat = is}; + dynarray_push(body, v); + } + }; break; + + default: { + log_error(&state->curr_tok.loc, "The %s is not supported inside function bodies", get_tok_str_dbg(&state->curr_tok)); + return 1; + }; break; + } + }; break; + } + return 0; +} + +ast_op_t* parse_fnc_body(parser_state_t* state) { + ast_op_t* body = _dynarray_create(2, sizeof(ast_op_t)); + + while (dynarray_length(state->tokens)) { + if (test_token_type_ex(state, TT_KW, KW_END)) break; + if (parse_item(state, &body)) { + return NULL; + } + } + return body; +} diff --git a/src/parser/precomp.c b/src/parser/precomp.c index 8ad8280..9490c6f 100644 --- a/src/parser/precomp.c +++ b/src/parser/precomp.c @@ -13,20 +13,19 @@ #include // compiles values in the token list, will error if invalid tokens -int compile_value(token_t* tokens) { +int compile_value(parser_state_t* state, token_t* tokens, token_t** tokens_out) { // 'f' for float, 'i' for int, 's' for string, 'c' for char char exclusive_type = '\0'; - token_t* tmp_toks = dynarray_create(token_t); + *tokens_out = dynarray_create(token_t); + token_t* tmp_toks = *tokens_out; token_t tok = {0}; - parser_state_t ps = {0}; - ps.tokens = tokens; while (dynarray_pop(tokens, &tok) == 0) { switch (tok.type) { case (TT_PUSH_CSTR): case (TT_PUSH_STR): { // TODO: Add C style string literall joining if (dynarray_length(tokens) != 0) { - log_error(&tok.loc, "Constants do not support C style string literal joining yet"); + log_error(&tok.loc, "Precomp does not support C style string literal joining yet"); return 1; } dynarray_push(tmp_toks, tok); @@ -35,180 +34,150 @@ int compile_value(token_t* tokens) { case (TT_PUSH_FLOAT): case (TT_PUSH_CHAR): case (TT_PUSH_BOOL): - case (TT_PUSH_MEM): { + case (TT_IDENT): { dynarray_push(tmp_toks, tok); }; break; case (TT_KW): { // TODO: Support if statements in constants at compile time against other constants - log_error(&tok.loc, "Keywords are currently not supported in constants"); - return 1; - } - case (TT_IDENT): { - // TODO: Support having other constants as arguments in constants, and memory values for making offsets - log_error(&tok.loc, "Identifiers are currently not supported in constants"); + log_error(&tok.loc, "Keywords are currently not supported in precomp"); return 1; } case (TT_OP): { + token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; + token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; + + if (t1->type == TT_IDENT) { + for (int i = 0; i < dynarray_length(state->prog.const_vars); i++) { + if (strcmp(state->prog.const_vars[i].name, t1->str_v) == 0) { + token_t* val = &state->prog.const_vars[i].val; + t1->loc = val->loc; + t1->type = val->type; + t1->int_v = val->int_v; + break; + } + } + } + if (t2->type == TT_IDENT) { + for (int i = 0; i < dynarray_length(state->prog.const_vars); i++) { + if (strcmp(state->prog.const_vars[i].name, t2->str_v) == 0) { + token_t* val = &state->prog.const_vars[i].val; + t2->loc = val->loc; + t2->type = val->type; + t2->int_v = val->int_v; + break; + } + } + } switch (tok.op_type) { case (OP_ADD): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == TT_PUSH_INT && t2->type == TT_PUSH_INT) { size_t val = t1->int_v + t2->int_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else if (t1->type == TT_PUSH_FLOAT && t2->type == TT_PUSH_FLOAT) { double_t val = t1->float_v + t2->float_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_FLOAT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); - - } else if (t1->type == TT_PUSH_MEM && t2->type == TT_PUSH_INT) { - size_t val = t1->int_v + t2->int_v; - loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); - token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be added together", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be added together", s1, s2); return 1; } }; break; case (OP_SUB): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == TT_PUSH_INT && t2->type == TT_PUSH_INT) { size_t val = t1->int_v - t2->int_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else if (t1->type == TT_PUSH_FLOAT && t2->type == TT_PUSH_FLOAT) { double_t val = t1->float_v - t2->float_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_FLOAT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); - } else if (t1->type == TT_PUSH_MEM && t2->type == TT_PUSH_INT) { - size_t val = t1->int_v - t2->int_v; - loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); - token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be subtracted from eachother", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be subtracted from eachother", s1, s2); return 1; } }; break; case (OP_MUL): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == TT_PUSH_INT && t2->type == TT_PUSH_INT) { size_t val = t1->int_v * t2->int_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else if (t1->type == TT_PUSH_FLOAT && t2->type == TT_PUSH_FLOAT) { double_t val = t1->float_v * t2->float_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_FLOAT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); - - } else if (t1->type == TT_PUSH_MEM && t2->type == TT_PUSH_INT) { - size_t val = t1->int_v * t2->int_v; - loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); - token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be multiplied together", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be multiplied together", s1, s2); return 1; } }; break; case (OP_DIV): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == TT_PUSH_INT && t2->type == TT_PUSH_INT) { size_t val = t1->int_v / t2->int_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else if (t1->type == TT_PUSH_FLOAT && t2->type == TT_PUSH_FLOAT) { double_t val = t1->float_v / t2->float_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_FLOAT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); - - } else if (t1->type == TT_PUSH_MEM && t2->type == TT_PUSH_INT) { - size_t val = t1->int_v / t2->int_v; - loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); - token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be divided", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be divided", s1, s2); return 1; } }; break; case (OP_MOD): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == TT_PUSH_INT && t2->type == TT_PUSH_INT) { size_t val = t1->int_v % t2->int_v; loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); - } else if (t1->type == TT_PUSH_MEM && t2->type == TT_PUSH_INT) { - size_t val = t1->int_v % t2->int_v; - loc_t loc = t1->loc; - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); - token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be modulo'd together", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be modulo'd together", s1, s2); return 1; } }; break; case (OP_EQ): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -228,23 +197,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } }; break; case (OP_GT): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -260,23 +227,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } }; break; case (OP_LT): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -292,23 +257,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } }; break; case (OP_GE): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -324,23 +287,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } }; break; case (OP_LE): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -356,23 +317,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } }; break; case (OP_NE): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -392,23 +351,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be compared", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be compared", s1, s2); return 1; } }; break; case (OP_SHR): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { size_t val = 0; switch (t1->type) { @@ -418,23 +375,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be SHR'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be SHR'd", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be SHR'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be SHR'd", s1, s2); return 1; } }; break; case (OP_SHL): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { size_t val = 0; switch (t1->type) { @@ -444,23 +399,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be SHL'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be SHL'd", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be SHL'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be SHL'd", s1, s2); return 1; } }; break; case (OP_BOR): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { size_t val = 0; switch (t1->type) { @@ -470,23 +423,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be BOR'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be BOR'd", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be BOR'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be BOR'd", s1, s2); return 1; } }; break; case (OP_BAND): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { size_t val = 0; switch (t1->type) { @@ -496,40 +447,37 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be BAND'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be BAND'd", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_INT, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be BAND'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be BAND'd", s1, s2); return 1; } }; break; case (OP_NOT): { - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 1)]; bool val = 0; - switch (t1->type) { + switch (t2->type) { case (TT_PUSH_INT): - val = !t1->bool_v; + val = !t2->bool_v; break; default: - const char* s1 = get_tok_str_dbg(t1); - log_error(&t1->loc, "Tokens %s cannot be negated", s1); + const char* s2 = get_tok_str_dbg(t2); + log_error(&t2->loc, "tmp_toks %s cannot be negated", s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); - token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); + token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t2->loc}; + dynarray_push(tmp_toks, tok); }; break; case (OP_AND): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -539,23 +487,21 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be AND'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be AND'd", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be SHL'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be SHL'd", s1, s2); return 1; } }; break; case (OP_OR): { - token_t* t2 = &tmp_toks[dynarray_length(tmp_toks - 1)]; - token_t* t1 = &tmp_toks[dynarray_length(tmp_toks - 2)]; if (t1->type == t2->type) { bool val = 0; switch (t1->type) { @@ -565,17 +511,17 @@ int compile_value(token_t* tokens) { default: const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be AND'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be AND'd", s1, s2); return 1; } - dynarray_pop(tokens, NULL); - dynarray_pop(tokens, NULL); + dynarray_pop(tmp_toks, NULL); + dynarray_pop(tmp_toks, NULL); token_t tok = {.type = TT_PUSH_BOOL, .int_v = val, .loc = t1->loc}; - dynarray_push(tokens, tok); + dynarray_push(tmp_toks, tok); } else { const char* s1 = get_tok_str_dbg(t1); const char* s2 = get_tok_str_dbg(t2); - log_error(&t1->loc, "Tokens %s and %s cannot be SHL'd", s1, s2); + log_error(&t1->loc, "tmp_toks %s and %s cannot be SHL'd", s1, s2); return 1; } }; break; @@ -585,8 +531,6 @@ int compile_value(token_t* tokens) { return 1; } } break; - case (TT_NONE): - continue; } } diff --git a/src/parser/tokcmp.c b/src/parser/tokcmp.c index 188cb09..0a3ea1c 100644 --- a/src/parser/tokcmp.c +++ b/src/parser/tokcmp.c @@ -11,12 +11,15 @@ token_t* expect_token_type(parser_state_t* state, token_type_t type) { const char* exoected_type_str = get_tok_type_str_dbg(type); + da_debug_t* dbg = (da_debug_t*)((size_t*)state->tokens-3); if (dynarray_pop(state->tokens, &state->curr_tok) != 0) { log_error(&state->curr_tok.loc, "Invalid word, expected %s, got nothing.", exoected_type_str); return NULL; } + + const char* s1 = get_tok_str_dbg(&state->curr_tok); + // log_debug(&state->curr_tok.loc, "Found %s", s1); if (state->curr_tok.type != type) { - const char* s1 = get_tok_str_dbg(&state->curr_tok); log_error(&state->curr_tok.loc, "Invalid word, expected %s, got %s.", exoected_type_str, s1); return NULL; } @@ -63,7 +66,7 @@ token_t* expect_token_type_ex(parser_state_t* state, token_type_t type, int inne token_t* test_token_type_ex(parser_state_t* state, token_type_t tok_type, int gen_type) { const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type); - token_t* token = &state->tokens[dynarray_length(state->tokens - 1)]; + token_t* token = &state->tokens[dynarray_length(state->tokens) - 1]; // NOTE: tests inner type in a generic way if (token->type != tok_type || ((int)token->kw_type) != gen_type) { return NULL; @@ -73,7 +76,7 @@ token_t* test_token_type_ex(parser_state_t* state, token_type_t tok_type, int ge token_t* test_token_type(parser_state_t* state, token_type_t tok_type) { const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type); - token_t* token = &state->tokens[dynarray_length(state->tokens - 1)]; + token_t* token = &state->tokens[dynarray_length(state->tokens) - 1]; if (token->type != tok_type) { return NULL; } diff --git a/src/path_utils.c b/src/path_utils.c new file mode 100644 index 0000000..5c59c36 --- /dev/null +++ b/src/path_utils.c @@ -0,0 +1,39 @@ +#include +#include +#include + +char* replace_extension(char* path, char* ext) { + const char* last_dot = strrchr(path, '.'); + const char* last_slash = strrchr(path, '/'); +#ifdef _WIN32 + const char* last_backslash = strrchr(path, '\\'); + if (!last_slash || (last_backslash && last_backslash > last_slash)) { + last_slash = last_backslash; + } +#endif + + if (last_dot && (!last_slash || last_dot > last_slash)) { + size_t base_len = last_dot - path; + size_t ext_len = strlen(ext); + char* result = malloc(base_len + 1 + ext_len + 1); + if (!result) { + return NULL; // >.< + } + memcpy(result, path, base_len); + result[base_len] = '\0'; + strcat(result, "."); + strcat(result, ext); + return result; + } + + size_t len = strlen(path); + size_t ext_len = strlen(ext); + char* result = malloc(len + 1 + ext_len + 1); + if (!result) { + return NULL; + } + strcpy(result, path); + strcat(result, "."); + strcat(result, ext); + return result; +} diff --git a/src/prettyprint.c b/src/prettyprint.c new file mode 100644 index 0000000..b9d8679 --- /dev/null +++ b/src/prettyprint.c @@ -0,0 +1,15 @@ + +#include +#include + +void _pretty_print_struct(size_t indent, pp_stub_t* structure); +void _pretty_print_string(size_t indent, char* str); +void _pretty_print_number(size_t indent, void* num, const char* fmt); + +void _pretty_print(size_t indent, pp_stub_t* structure) { + if (structure->magic[0] != PP_MAGIC_PRE) { + printf("(struct (unknown)*)(%p)\n", structure); + return; + } + switch (structure->magic[1]) {} +} diff --git a/src/token.c b/src/token.c index 7c9a005..3284eee 100644 --- a/src/token.c +++ b/src/token.c @@ -78,10 +78,8 @@ const char* get_tok_type_str_dbg(token_type_t tok_t) { case (TT_PUSH_INT): return "Intager"; case (TT_PUSH_STR): return "String"; case (TT_PUSH_CSTR): return "CString"; - case (TT_PUSH_MEM): return "Memory address"; case (TT_PUSH_FLOAT): return "Float"; case (TT_PUSH_BOOL): return "Bool"; - case (TT_NONE): assert(true && "Invalid"); } return "Unreachable"; } @@ -90,42 +88,38 @@ const char* get_tok_str_dbg(token_t* tok) { char* buf = (char*)malloc(buf_size); switch (tok->type) { case (TT_KW): { - snprintf(buf, buf_size, "Keyword '%s'", KW_LIST[tok->kw_type]); + snprintf(buf, buf_size, "Keyword('%s')", KW_LIST[tok->kw_type]); }; break; case (TT_OP): { - snprintf(buf, buf_size, "Operator '%s'", OP_LIST[tok->op_type]); + snprintf(buf, buf_size, "Operator('%s')", OP_LIST[tok->op_type]); }; break; case (TT_IDENT): { - snprintf(buf, buf_size, "Identifier '%s'", tok->str_v); + snprintf(buf, buf_size, "Identifier('%s')", tok->str_v); }; break; case (TT_PUSH_CHAR): { - snprintf(buf, buf_size, "'%c'", tok->char_v); + snprintf(buf, buf_size, "Char('%c')", tok->char_v); }; break; case (TT_PUSH_INT): { - snprintf(buf, buf_size, "%zu", tok->int_v); + snprintf(buf, buf_size, "Int(%zu)", tok->int_v); }; break; case (TT_PUSH_STR): { while (strlen(tok->str_v) > buf_size-2) { buf = realloc(buf, buf_size*=2); } - snprintf(buf, buf_size, "\"%s\"", tok->str_v); + snprintf(buf, buf_size, "Str(\"%s\")", tok->str_v); }; break; case (TT_PUSH_CSTR): { while (strlen(tok->str_v) > buf_size-3) { buf = realloc(buf, buf_size*=2); } - snprintf(buf, buf_size, "c\"%s\"", tok->str_v); - }; break; - case (TT_PUSH_MEM): { - snprintf(buf, buf_size, "Memory address label '%s'", tok->str_v); + snprintf(buf, buf_size, "CStr(\"%s\")", tok->str_v); }; break; case (TT_PUSH_FLOAT): { - snprintf(buf, buf_size, "%f", tok->float_v); + snprintf(buf, buf_size, "Float(%f)", tok->float_v); }; break; case (TT_PUSH_BOOL): { - snprintf(buf, buf_size, "%b", tok->bool_v); + snprintf(buf, buf_size, "Bool(%s)", tok->bool_v ? "true" : "false"); }; break; - case (TT_NONE): assert(true && "Invalid"); } return buf; } diff --git a/src/tokeniser.c b/src/tokeniser.c index cba0941..270744a 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -1,5 +1,5 @@ +#include "logger.h" #include -#include #include #include #include @@ -125,6 +125,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_PUSH_CHAR, .str_v = buf, + .loc = loc, }; dynarray_push(tokens, tok); } break; @@ -141,6 +142,7 @@ token_t* tokenise_string(char* file_path, char* str) { } char* buf = malloc(1024 * 4); bool escaped = false; + i+=1; for (int y = 0;;) { if (str[i] == '"' && !escaped) { break; @@ -163,6 +165,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = is_cstr ? TT_PUSH_CSTR : TT_PUSH_STR, .str_v = buf, + .loc = loc, }; dynarray_push(tokens, tok); } break; @@ -182,7 +185,6 @@ token_t* tokenise_string(char* file_path, char* str) { i += 1; } buf[buf_counter + 1] = '\0'; - printf("'%s'\n", buf); // PERF: I dont know if this makes it faster or slower, need 2 check buf = realloc(buf, strlen(buf) + 1); @@ -192,6 +194,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_PUSH_INT, .int_v = num, + .loc = loc, }; dynarray_push(tokens, tok); goto loop_end; @@ -212,6 +215,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_OP, .op_type = (op_type_t)i, + .loc = loc, }; dynarray_push(tokens, tok); goto loop_end; @@ -221,6 +225,7 @@ token_t* tokenise_string(char* file_path, char* str) { token_t tok = { .type = TT_IDENT, .str_v = strdup(buf), + .loc = loc, }; dynarray_push(tokens, tok); loop_end: diff --git a/test.mrph b/test.mrph index 095766f..fe59f47 100644 --- a/test.mrph +++ b/test.mrph @@ -1,7 +1,9 @@ +const test 1 end + fn main with void returns int do 34 35 add __print__ - "hewo world" println + "hewo world" end