From f6060fb14216676533a90a5f7db6021e550d7147 Mon Sep 17 00:00:00 2001 From: MCorange Date: Wed, 25 Jun 2025 22:09:56 +0300 Subject: [PATCH] Add KW's and OP's --- src/include/token.h | 4 +--- src/main.c | 29 +++++++++++++++++++++--- src/token.c | 4 ++++ src/tokeniser.c | 55 ++++++++++++++++++++++++++++++++++++++++----- test.mrph | 2 +- 5 files changed, 81 insertions(+), 13 deletions(-) diff --git a/src/include/token.h b/src/include/token.h index 59523a3..8e1c856 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -5,7 +5,6 @@ #include #include - typedef enum token_type_e { TT_NONE = 0, TT_KW, @@ -16,6 +15,7 @@ typedef enum token_type_e { TT_PUSH_CHAR, TT_PUSH_INT, TT_PUSH_FLOAT, + TT_IDENT } token_type_t; typedef enum kw_type_e { @@ -117,8 +117,6 @@ typedef struct token_s { }; } token_t; - - extern const char* OP_LIST[]; extern const char* KW_LIST[]; #endif // _H_MORPH_TOKEN diff --git a/src/main.c b/src/main.c index 7032284..7beaa55 100644 --- a/src/main.c +++ b/src/main.c @@ -1,13 +1,36 @@ +#include "dynarray.h" +#include "token.h" #include #include #include -int main(int argc, const char **argv) { +int main(int argc, const char** argv) { if (argc < 2) { printf("Usage: %s [source]\n", argv[0]); return 1; } char* str = read_to_string(argv[1]); - tokenise_string(argv[1], str); - return 0; + token_t* tokens = tokenise_string((char*)argv[1], str); + int token_count = dynarray_length(tokens); + + for (int i = 0; i < token_count; i++) { + token_t tok = tokens[i]; + switch (tok.type) { + case (TT_NONE): + break; + case (TT_KW): { + printf("TOK: KW %s\n", KW_LIST[tok.kw_type]); + } break; + case (TT_OP): { + printf("TOK: OP %s\n", OP_LIST[tok.op_type]); + } break; + case (TT_IDENT): { + printf("TOK: IDENT \"%s\"\n", tok.str_v); + } break; + default: + break; + } + } + + return 0; } diff --git a/src/token.c b/src/token.c index cb98f31..4f38069 100644 --- a/src/token.c +++ b/src/token.c @@ -1,5 +1,7 @@ #include +// clang-format off + const char* OP_LIST[] = { [OP_ADD] = "add", [OP_SUB] = "sub", @@ -59,3 +61,5 @@ const char* KW_LIST[] = { [KW_CONST] = "const", [KW_MEMORY] = "memory" }; + +// clang-format on diff --git a/src/tokeniser.c b/src/tokeniser.c index ee318c5..3131f61 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -28,16 +29,58 @@ token_t* tokenise_string(char* file_path, char* str) { // TODO: manage memory better // INFO: If you make an ident longer than 4kb i will murder you char* buf = malloc(1024 * 4); - int buf_counter = 1; - *buf = str[i]; - while (i < str_len && (str[i + 1] != ' ' || str[i + 1] != '\t' || str[i + 1] != '\n' || str[i + 1] != '\r')) { - i += 1; + memset(buf, 0, 1024 * 4); + int buf_counter = 0; + while (i < str_len) { + if (str[i] == ' ' || str[i] == '\t' || str[i] == '\r' || str[i] == '\n') { + break; + } buf[buf_counter++] = str[i]; + i += 1; } + buf[buf_counter + 1] = '\0'; + printf("'%s'\n", buf); + // PERF: I dont know if this makes it faster or slower, need 2 check buf = realloc(buf, strlen(buf) + 1); - + + bool found = false; + for (int i = 1; i < KW_COUNT__; i++) { + if (strcmp(KW_LIST[i], buf) == 0) { + token_t tok = { + .type = TT_KW, + .kw_type = (kw_type_t)i, + }; + dynarray_push(tokens, tok); + found = true; + } + } + + if (found) { + break; + } + for (int i = 1; i < OP_COUNT__; i++) { + if (strcmp(OP_LIST[i], buf) == 0) { + token_t tok = { + .type = TT_OP, + .op_type = (op_type_t)i, + }; + dynarray_push(tokens, tok); + found = true; + } + } + + if (found) { + break; + } + token_t tok = { + .type = TT_IDENT, + .str_v = strdup(buf), + }; + dynarray_push(tokens, tok); + + free(buf); } } } - return NULL; + return tokens; } diff --git a/test.mrph b/test.mrph index f0b6a76..45242ec 100644 --- a/test.mrph +++ b/test.mrph @@ -1,5 +1,5 @@ -fn main with void returs int do +fn main with void returns int do 34 35 add __print__ end