From 957a15a3af9e9af73c354c7fca7cc0d08502cd87 Mon Sep 17 00:00:00 2001 From: MCorange Date: Fri, 26 Jul 2024 01:01:10 +0300 Subject: [PATCH] Logger, a little bit of parser --- src/cliargs.c | 2 +- src/include/ast.h | 61 ++++++++++++++++ src/include/cliargs.h | 6 +- src/include/dyn_arr.h | 107 +++++++++++++++++----------- src/include/errors.h | 6 ++ src/include/logger.h | 46 ++++++++++++ src/include/parser.h | 10 +++ src/include/token.h | 12 +++- src/loc.c | 16 +++++ src/logger.c | 44 ++++++++++++ src/main.c | 17 ++--- src/parser.c | 111 +++++++++++++++++++++++++++++ src/token.c | 78 +++++++++++++++++++-- src/tokeniser.c | 159 ++++++++++++++++++++++++++++++++++-------- test.mcl | 2 +- 15 files changed, 586 insertions(+), 91 deletions(-) create mode 100644 src/include/ast.h create mode 100644 src/include/errors.h create mode 100644 src/include/logger.h create mode 100644 src/include/parser.h create mode 100644 src/loc.c create mode 100644 src/logger.c create mode 100644 src/parser.c diff --git a/src/cliargs.c b/src/cliargs.c index 04235c2..0fc81db 100644 --- a/src/cliargs.c +++ b/src/cliargs.c @@ -69,4 +69,4 @@ void print_help(char* progname) { void print_version(char* progname) { printf("%s: The very epic compiler %s\n", progname, MCL_VERSION); exit(0); -} +} \ No newline at end of file diff --git a/src/include/ast.h b/src/include/ast.h new file mode 100644 index 0000000..f463151 --- /dev/null +++ b/src/include/ast.h @@ -0,0 +1,61 @@ + +#ifndef _H_MCL_AST +#define _H_MCL_AST + +#include "loc.h" +#include "dyn_arr.h" + + +DEFINE_DA(ast_node, void*); + + +typedef enum ast_type_e { + AT_BIN_ADD, + AT_BIN_SUB, + AT_BIN_DIV, + AT_BIN_MUL, + AT_UN_DEREF, + AT_UN_XOR, + AT_FUNC, + AT_ASSIGN, + AT_EXPR, +} ast_type_t; + +typedef struct ast_ident_s { + char* name; + loc_t loc; +} ast_ident_t; + +typedef struct type_s { + ast_ident_t name; + struct type_s* inner; +} type_t; + + +typedef struct ast_node_s { + ast_type_t* type; + loc_t loc; +} ast_node_t; + + +typedef struct ast_program_s { + mcl_da_ast_node_t body; + loc_t loc; +} ast_program_t; + +typedef struct ast_func_param_s { + ast_ident_t name; + type_t type; +} ast_func_param_t; + +DEFINE_DA(func_param, ast_func_param_t); + +typedef struct ast_func_decl_s { + ast_type_t* type; + loc_t loc; + ast_ident_t id; + mcl_da_func_param_t params; + mcl_da_ast_node_t body; +} ast_func_decl_t; + +#endif \ No newline at end of file diff --git a/src/include/cliargs.h b/src/include/cliargs.h index aa0ca22..7b1358a 100644 --- a/src/include/cliargs.h +++ b/src/include/cliargs.h @@ -2,15 +2,13 @@ #define _H_MCL_CLIARGS #include -#include -#include +#include #include "dyn_arr.h" #ifndef MCL_VERSION #define MCL_VERSION "0.0.1" #endif -typedef char* string; DEFINE_DA(string, char*) static struct option cli_options[] = { @@ -32,4 +30,4 @@ typedef struct cliargs_s { cliargs_t* parse_cliargs(int argc, char** argv); -#endif +#endif \ No newline at end of file diff --git a/src/include/dyn_arr.h b/src/include/dyn_arr.h index 6355b05..9f2c362 100644 --- a/src/include/dyn_arr.h +++ b/src/include/dyn_arr.h @@ -10,8 +10,11 @@ typedef struct mcl_da_##name##_s { \ size_t capacity; \ } mcl_da_##name##_t; \ type* mcl_da_##name##_pop(mcl_da_##name##_t* da); \ +type* mcl_da_##name##_peek(mcl_da_##name##_t* da); \ +type* mcl_da_##name##_pop_front(mcl_da_##name##_t* da); \ +type* mcl_da_##name##_peek_front(mcl_da_##name##_t* da); \ void mcl_da_##name##_push(mcl_da_##name##_t* da, type item); \ -void mcl_da_##name##_free(mcl_da_##name##_t* da); \ +void mcl_da_##name##_free(mcl_da_##name##_t* da); #define DEFINE_DA_IMPL(name, type) \ type* mcl_da_##name##_pop(mcl_da_##name##_t* da) { \ @@ -38,50 +41,74 @@ void mcl_da_##name##_push(mcl_da_##name##_t* da, type item) { \ } \ da->items[da->count++] = item; \ } \ +type* mcl_da_##name##_pop_front(mcl_da_##name##_t* da) { \ + if (da->capacity <= 0 || da->count <= 0) \ + return NULL; \ + if (da->count < da->capacity / 2) { \ + da->capacity /= 2; \ + da->items = realloc(da->items, \ + da->capacity * sizeof(type)); \ + assert(da->items && "Out of memory"); \ + } \ + type* item = malloc(sizeof(type)); \ + type* temp = malloc(sizeof(type) * da->count); \ + *item = da->items[0]; \ + memcpy(temp, &da->items[1], sizeof(type) * da->count); \ + memcpy(da->items, temp, sizeof(type) * --da->count); \ + free(temp); \ + return item; \ +} \ +type* mcl_da_##name##_peek(mcl_da_##name##_t* da) { \ + if (da->count < 1) return NULL; \ + return &da->items[da->count - 1]; \ +} \ +type* mcl_da_##name##_peek_front(mcl_da_##name##_t* da) { \ + return &da->items[0]; \ +} \ void mc_da_##name##_free(mcl_da_##name##_t* da) { \ free(da->items); \ } -#define DEFINE_PTR_DA(name, type) \ -typedef struct mcl_ptr_da_##name##_s { \ - type** items; \ - size_t count; \ - size_t capacity; \ -} mcl_ptr_da_##name##_t; \ -type* mcl_da_##name##_pop(mcl_da_##name##_t* da); \ -void mcl_da_##name##_push(mcl_da_##name##_t* da, type* item); - - -#define DEFINE_PTR_DA_IMPL(type) \ -type* mcl_ptr_da_##name##_pop(mcl_da_##name##_t* da) { \ - if (da->capacity <= 0 || da->count <= 0) \ - return NULL; \ - if (da->count < da->capacity / 2) { \ - da->capacity /= 2; \ - da->items = realloc(da->items, \ - da->capacity * sizeof(type*)); \ - assert(da->items && "Out of memory"); \ - } \ - return da->items[(da->count--) - 1]; \ -} \ -void mcl_ptr_da_##name##_push(mcl_da_##name##_t* da, type* item) { \ - if (da->capacity <= da->count) { \ - if (da->capacity == 0) { \ - da->capacity = 8; \ - } else { \ - da->capacity *= 2; \ - } \ - da->items = realloc(da->items, \ - da->capacity * sizeof(type*)); \ - assert(da->items && "Out of memory"); \ - } \ - da->items[da->count++] = item; \ -} \ -void mc_ptr_da_##name##_free(mcl_da_##name##_* da) { \ - free(da->items); \ -} +//#define DEFINE_PTR_DA(name, type) \ +//typedef struct mcl_ptr_da_##name##_s { \ +// type** items; \ +// size_t count; \ +// size_t capacity; \ +//} mcl_ptr_da_##name##_t; \ +//type* mcl_da_##name##_pop(mcl_da_##name##_t* da); \ +//void mcl_da_##name##_push(mcl_da_##name##_t* da, type* item); +// +// +//#define DEFINE_PTR_DA_IMPL(type) \ +//type* mcl_ptr_da_##name##_pop(mcl_da_##name##_t* da) { \ +// if (da->capacity <= 0 || da->count <= 0) \ +// return NULL; \ +// if (da->count < da->capacity / 2) { \ +// da->capacity /= 2; \ +// da->items = realloc(da->items, \ +// da->capacity * sizeof(type*)); \ +// assert(da->items && "Out of memory"); \ +// } \ +// return da->items[(da->count--) - 1]; \ +//} \ +//void mcl_ptr_da_##name##_push(mcl_da_##name##_t* da, type* item) { \ +// if (da->capacity <= da->count) { \ +// if (da->capacity == 0) { \ +// da->capacity = 8; \ +// } else { \ +// da->capacity *= 2; \ +// } \ +// da->items = realloc(da->items, \ +// da->capacity * sizeof(type*)); \ +// assert(da->items && "Out of memory"); \ +// } \ +// da->items[da->count++] = item; \ +//} \ +//void mc_ptr_da_##name##_free(mcl_da_##name##_* da) { \ +// free(da->items); \ +//} #define MCL_DA_FOR_IN(type, da, item, body) do { \ type item; \ @@ -91,4 +118,4 @@ void mc_ptr_da_##name##_free(mcl_da_##name##_* da) { \ } \ } while (0) -#endif +#endif \ No newline at end of file diff --git a/src/include/errors.h b/src/include/errors.h new file mode 100644 index 0000000..6d65789 --- /dev/null +++ b/src/include/errors.h @@ -0,0 +1,6 @@ +#ifndef _H_MCL_ERRORS +#define _H_MCL_ERRORS + +const char* FUNC_DECL_EXMPL = "To define a function follow this example: \nmain :: fn(argc: i32, argv: string[]) {\n ...\n}"; + +#endif \ No newline at end of file diff --git a/src/include/logger.h b/src/include/logger.h new file mode 100644 index 0000000..2a64ff0 --- /dev/null +++ b/src/include/logger.h @@ -0,0 +1,46 @@ +#ifndef _H_MCL_LOG +#define _H_MCL_LOG + +#include "loc.h" + +#ifdef _LOG_COLORS + #define RESET "\x1b[0m" + #define BRIGHT "\x1b[1m" + #define DIM "\x1b[2m" + #define UNDERSCORE "\x1b[4m" + #define BLINK "\x1b[5m" + #define REVERSE "\x1b[7m" + #define HIDDEN "\x1b[8m" + #define FG_BLACK "\x1b[30m" + #define FG_RED "\x1b[31m" + #define FG_GREEN "\x1b[32m" + #define FG_YELLOW "\x1b[33m" + #define FG_BLUE "\x1b[34m" + #define FG_MAGENTA "\x1b[35m" + #define FG_CYAN "\x1b[36m" + #define FG_WHITE "\x1b[37m" + #define BG_BLACK "\x1b[40m" + #define BG_RED "\x1b[41m" + #define BG_GREEN "\x1b[42m" + #define BG_YELLOW "\x1b[43m" + #define BG_BLUE "\x1b[44m" + #define BG_MAGENTA "\x1b[45m" + #define BG_CYAN "\x1b[46m" + #define BG_WHITE "\x1b[47m" +#endif + + +typedef enum log_level_e { + ERROR = 0, + WARN, + HELP, + INFO, + DEBUG, +} log_level_t; + +static log_level_t FILTER_LEVEL = INFO; + +void mcl_log(log_level_t level, const char* fmt, ...); +void mcl_log_loc(log_level_t level, loc_t* loc, const char* fmt, ...); + +#endif \ No newline at end of file diff --git a/src/include/parser.h b/src/include/parser.h new file mode 100644 index 0000000..f0f730b --- /dev/null +++ b/src/include/parser.h @@ -0,0 +1,10 @@ +#ifndef _H_MCL_PARSER +#define _H_MCL_PARSER + + +#include "tokeniser.h" +#include "ast.h" + +ast_program_t* parse(mcl_da_token_t* tokens); + +#endif \ No newline at end of file diff --git a/src/include/token.h b/src/include/token.h index a697c19..e8572cc 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -26,6 +26,16 @@ typedef enum token_type_e { TT_EQ, // = TT_LT, // < TT_GT, // > + TT_KW_FN, + TT_KW_RETURN, + TT_KW_IF, + TT_KW_ELSE, + TT_KW_FOR, + TT_KW_WHILE, + TT_KW_STRUCT, + TT_KW_ENUM, + TT_KW_BREAK, + TT_KW_CONTINUE, } token_type_t; @@ -37,4 +47,4 @@ typedef struct token_s { char* token_to_string(token_t* tt); -#endif +#endif \ No newline at end of file diff --git a/src/loc.c b/src/loc.c new file mode 100644 index 0000000..a4a7998 --- /dev/null +++ b/src/loc.c @@ -0,0 +1,16 @@ +#include "loc.h" +#include +#include + +// NEEDS to be freed +char* loc_str(loc_t* loc) { + size_t len = ( (20*2) + // 2 max lens of u64 int as strings + strlen(loc->file) + // file path length + 2 // 2 colons + ); + char* buf = malloc(sizeof(char)*len); + snprintf(buf, len, "%s:%zu:%zu", loc->file, loc->line, loc->col); + return buf; +} + + diff --git a/src/logger.c b/src/logger.c new file mode 100644 index 0000000..73e087f --- /dev/null +++ b/src/logger.c @@ -0,0 +1,44 @@ +#include +#include + +#define _LOG_COLORS +#include "logger.h" +#include "loc.h" + + +void mcl_log(log_level_t level, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + char* prefix; + switch (level) { + case ERROR: prefix = BRIGHT FG_RED "ERROR" RESET; break; + case WARN: prefix = BRIGHT FG_YELLOW "WARN" RESET; break; + case HELP: prefix = BRIGHT FG_CYAN "HELP" RESET; break; + case INFO: prefix = BRIGHT FG_GREEN "INFO" RESET; break; + case DEBUG: prefix = BRIGHT FG_BLUE "DEBUG" RESET; break; + } + printf("%s: ", prefix); + vprintf(fmt, args); + puts(""); + va_end(args); +} + +void mcl_log_loc(log_level_t level, loc_t* loc, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + char* prefix; + switch (level) { + case ERROR: prefix = BRIGHT FG_RED "ERROR" RESET; break; + case WARN: prefix = BRIGHT FG_YELLOW "WARN" RESET; break; + case HELP: prefix = BRIGHT FG_CYAN "HELP" RESET; break; + case INFO: prefix = BRIGHT FG_GREEN "INFO" RESET; break; + case DEBUG: prefix = BRIGHT FG_BLUE "DEBUG" RESET; break; + } + + printf(BRIGHT"%s"RESET":"BRIGHT"%zu"RESET":"BRIGHT"%zu"RESET": ", loc->file, loc->line, loc->col); + + printf("%s: ", prefix); + vprintf(fmt, args); + puts(""); + va_end(args); +} \ No newline at end of file diff --git a/src/main.c b/src/main.c index 777442d..413a761 100644 --- a/src/main.c +++ b/src/main.c @@ -1,21 +1,23 @@ #include #include #include + #include "cliargs.h" #include "dyn_arr.h" #include "token.h" #include "tokeniser.h" +#include "parser.h" +#include "logger.h" int main(int argc, char** argv) { cliargs_t* cliargs = parse_cliargs(argc, argv); - printf("Hewo world :33\n"); - printf("Output file: %s\n", cliargs->output); + mcl_log(DEBUG, "Output file: %s", cliargs->output); MCL_DA_FOR_IN(char*, &cliargs->input, file, { - printf("Input file: %s\n", file); + mcl_log(DEBUG, "Input file: %s", file); tokeniser_t* tokeniser = tokenise(file); if (!tokeniser) { - printf("Failed to tokenise\n"); + mcl_log(ERROR, "Failed to tokenise"); return 1; } MCL_DA_FOR_IN(token_t, &tokeniser->tokens, token, { @@ -25,9 +27,8 @@ int main(int argc, char** argv) { token.loc.col, token_to_string(&token)); }); + //ast_program_t* prog = parse(&tokeniser->tokens); + }); - - - return 0; -} +} \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..450e8d5 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include + +#include "parser.h" +#include "ast.h" +#include "token.h" +#include "tokeniser.h" +#include "errors.h" +#include "dyn_arr.h" +#include "logger.h" + +DEFINE_DA_IMPL(ast_node, void*); +DEFINE_DA_IMPL(func_param, ast_func_param_t); + +token_t* expect_token(mcl_da_token_t* tks, token_t t); +token_t* try_token(mcl_da_token_t* tks, token_t t); +token_t* expect_token_err(mcl_da_token_t* tks, token_t t); +ast_func_decl_t* parse_func(mcl_da_token_t* tokens, ast_ident_t name); + + +ast_program_t* parse(mcl_da_token_t* tokens) { + ast_program_t* prog = malloc(sizeof(ast_program_t)); + assert(prog && "Out of mem"); + + token_t* tok = NULL; + while ((tok=mcl_da_token_pop_front(tokens)) != NULL) { + switch(tok->type) { + case TT_IDENT: { + // printf("IDENT: %s\n", tok->text); + if (!expect_token_err(tokens, (token_t){.type=TT_COLON })) return NULL; + if (!expect_token_err(tokens, (token_t){.type=TT_COLON })) return NULL; + token_t* type = mcl_da_token_pop_front(tokens); + + if (type->type == TT_KW_FN) { + ast_ident_t name = { + .name = tok->text, + .loc = tok->loc + }; + mcl_da_ast_node_push(&prog->body, parse_func(tokens, name)); + } else if (type->type == TT_KW_STRUCT) { + + } else if (type->type == TT_KW_ENUM) { + + } + return NULL; + + } break; + } + // mcl_da_ast_node_push(prog->body, ); + } + return prog; +} + + +ast_func_decl_t* parse_func(mcl_da_token_t* tokens, ast_ident_t name) { + ast_func_decl_t* func = malloc(sizeof(ast_func_decl_t)); + // parse args + if (!expect_token_err(tokens, (token_t){.type=TT_PAREN_L})) return NULL; + while (!try_token(tokens, (token_t){.type=TT_PAREN_R})) { + + // Name + token_t* arg_name = expect_token_err(tokens,(token_t){.type=TT_IDENT}); + if (!arg_name) return NULL; + + if (!expect_token_err(tokens, (token_t){.type=TT_COLON})) return NULL; + + // Type + token_t* type = expect_token_err(tokens, (token_t){.type=TT_IDENT}); + if (!type) return NULL; + + ast_func_param_t param = { + .name = { .name = arg_name->text, .loc = arg_name->loc }, + .type = { .name = { .name = type->text, .loc = type->loc}, .inner=NULL}, + }; + mcl_da_func_param_push(&func->params, param); + + if (!try_token(tokens, (token_t){.type=TT_COMMA})) { + if (!expect_token_err(tokens, (token_t){.type=TT_PAREN_R})) return NULL; + } + } + // parse body + return func; +} + +token_t* expect_token(mcl_da_token_t* tks, token_t t) { + token_t* _tok_ = mcl_da_token_pop_front(tks); + if (_tok_->type != t.type) { + return NULL; + } + return _tok_; +} +token_t* try_token(mcl_da_token_t* tks, token_t t) { + token_t* _tok_ = mcl_da_token_peek_front(tks); + if (_tok_->type != t.type) { + return NULL; + } + return _tok_; +} +token_t* expect_token_err(mcl_da_token_t* tks, token_t t) { + token_t* _tok_ = mcl_da_token_pop_front(tks); + if (_tok_->type != t.type) { + mcl_log(ERROR, "Expected %s", token_to_string(&t)); + mcl_log_loc(INFO, &_tok_->loc, "Got %s", token_to_string(_tok_)); + return NULL; + + } + return _tok_; +} + diff --git a/src/token.c b/src/token.c index 3de450a..ceb3a36 100644 --- a/src/token.c +++ b/src/token.c @@ -3,21 +3,29 @@ #include #include "token.h" +char* escape_str(char* str); char* token_to_string(token_t* t) { char* buf = {0}; int len = 0; switch(t->type) { - case TT_IDENT: - return t->text; - case TT_STR: - len = strlen(t->text)+3; + case TT_IDENT: + // printf("IDENT\n"); + len = strlen(t->text)+10; buf = malloc(len); - snprintf(buf, len, "\"%s\"", t->text); + snprintf(buf, len, "IDENT(\"%s\")", escape_str(t->text)); + return buf; + case TT_STR: + // printf("STR\n"); + len = strlen(t->text)+10; + buf = malloc(len); + snprintf(buf, len, "STR(\"%s\")", t->text); + buf = escape_str(t->text); return buf; case TT_CHR: - buf = malloc(4); - snprintf(buf, 4, "'%c\'", *t->text); + // printf("CHR\n"); + buf = malloc(10); + snprintf(buf, 10, "CHAR('%c')", *t->text); return buf; case TT_CURLY_R: return "}"; @@ -57,5 +65,61 @@ char* token_to_string(token_t* t) { return "<"; case TT_GT: return ">"; + case TT_KW_FN: + return "KEYWORD(fn)"; + case TT_KW_RETURN: + return "KEYWORD(return)"; + case TT_KW_IF: + return "KEYWORD(if)"; + case TT_KW_ELSE: + return "KEYWORD(else)"; + case TT_KW_FOR: + return "KEYWORD(for)"; + case TT_KW_WHILE: + return "KEYWORD(while)"; + case TT_KW_STRUCT: + return "KEYWORD(struct)"; + case TT_KW_ENUM: + return "KEYWORD(enum)"; + case TT_KW_BREAK: + return "KEYWORD(break)"; + case TT_KW_CONTINUE: + return "KEYWORD(continue)"; } } + +char* escape_str(char* str) { + // First, determine the length of the new string + size_t new_length = 0; + for (const char* p = str; *p != '\0'; ++p) { + if (*p == '\n' || *p == '\r') { + new_length += 2; // Each '\n' or '\r' becomes two characters: '\\' and 'n' or 'r' + } else { + new_length += 1; + } + } + + // Allocate memory for the new string + char* new_str = (char*)malloc(new_length + 1); // +1 for the null terminator + if (new_str == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + + // Replace '\n' with "\\n" and '\r' with "\\r" + char* q = new_str; + for (const char* p = str; *p != '\0'; ++p) { + if (*p == '\n') { + *q++ = '\\'; + *q++ = 'n'; + } else if (*p == '\r') { + *q++ = '\\'; + *q++ = 'r'; + } else { + *q++ = *p; + } + } + *q = '\0'; // Null-terminate the new string + + return new_str; +} \ No newline at end of file diff --git a/src/tokeniser.c b/src/tokeniser.c index 802dc63..40d1ece 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -4,15 +4,34 @@ #include "tokeniser.h" #include "dyn_arr.h" +#include "logger.h" #include "token.h" DEFINE_DA_IMPL(token, token_t) #define TZ_TOK_PUSH(_loc, _type, _text) mcl_da_token_push(&tz->tokens, (token_t){.loc = (_loc), .type = (_type), .text=(_text)}) +static struct { + char* k; + token_type_t v; +} KEYWORDS[] = { + {"fn", TT_KW_FN }, + {"return", TT_KW_RETURN}, + {"for", TT_KW_FOR}, + {"if", TT_KW_IF}, + {"else", TT_KW_ELSE}, + {"enum", TT_KW_ENUM}, + {"struct", TT_KW_STRUCT}, + {"while", TT_KW_WHILE}, + {"break", TT_KW_BREAK}, + {"continue", TT_KW_CONTINUE} +}; + tokeniser_t* tokenise(char* file) { tokeniser_t* tz = malloc(sizeof(tokeniser_t)); tz->loc.file = file; + tz->loc.line = 1; + tz->loc.col = 1; FILE* f = fopen(file, "r"); if (!f) { printf("Could not open file %s\n", file); @@ -21,30 +40,33 @@ tokeniser_t* tokenise(char* file) { char c; while ((c = fgetc(f)) != EOF) { - tz->loc.col++; switch(c) { - case ' ': case '\t': - case '\r': break; + case '\r': + case ' ':{ + tz->loc.col += 1; + } break; case '\n': { - tz->loc.col = 0; + tz->loc.col = 1; tz->loc.line++; } break; case '"': { + loc_t loc = tz->loc; int size = 256; int i = 0; char* buf = malloc(size * sizeof(char)); while ((c = fgetc(f)) != EOF) { + tz->loc.col++; if (size <= strlen(buf) - 1) { buf = realloc(buf, size *= 2); } if (c == '\n') { - printf("ERROR: Newline in string\n"); + mcl_log_loc(ERROR, &tz->loc, "No newlines in strings"); return NULL; } - tz->loc.col++; + if (c == '"') break; if (c == '\\') { switch (c = fgetc(f)) { @@ -55,25 +77,36 @@ tokeniser_t* tokenise(char* file) { buf[i++] = c; } - TZ_TOK_PUSH(tz->loc, TT_STR, buf); + TZ_TOK_PUSH(loc, TT_STR, buf); } break; case '\'': { + loc_t loc = tz->loc; bool escape = false; char c = fgetc(f); + tz->loc.col += 1; char* buf = malloc(1 * sizeof(char)); if (c == '\\') { c = fgetc(f); + tz->loc.col += 1; switch (c) { case 'n': *buf = '\n'; break; default: - printf("ERROR: Unknown escape: \\%c\n", c); + mcl_log_loc(ERROR, &tz->loc, "Unknown escape: \\%c\n", c); return NULL; } + } else { + tz->loc.col += 1; *buf = c; } - TZ_TOK_PUSH(tz->loc, TT_CHR, buf); + + c = fgetc(f); + if (c != '\'') { + mcl_log_loc(ERROR, &tz->loc, "Expected \"'\" found \"%c\"", c); + return NULL; + } + TZ_TOK_PUSH(loc, TT_CHR, buf); } @@ -104,6 +137,7 @@ tokeniser_t* tokenise(char* file) { case 'y': case 'Y': case 'z': case 'Z': case '_': { + loc_t loc = tz->loc; int size = 256; int i = 1; char* buf = malloc(size * sizeof(char)); @@ -114,6 +148,7 @@ tokeniser_t* tokenise(char* file) { (c >= '0' && c <= '9') || (c == '_') )) { + tz->loc.col++; ungetc(c, f); break; }; @@ -124,31 +159,97 @@ tokeniser_t* tokenise(char* file) { tz->loc.col++; buf[i++] = c; } - TZ_TOK_PUSH(tz->loc, TT_IDENT, buf); + bool found = false; + for (int i = 0; i < sizeof(KEYWORDS)/sizeof(KEYWORDS[0]); i++) { + if (strcmp(buf, KEYWORDS[i].k) == 0) { + TZ_TOK_PUSH(loc, KEYWORDS[i].v, NULL); + found = true; + } + } + + if (!found) TZ_TOK_PUSH(loc, TT_IDENT, buf); } break; - case '}': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '{': TZ_TOK_PUSH(tz->loc, TT_CURLY_L, NULL); break; - case ']': TZ_TOK_PUSH(tz->loc, TT_BRACK_R, NULL); break; - case '[': TZ_TOK_PUSH(tz->loc, TT_BRACK_L, NULL); break; - case ')': TZ_TOK_PUSH(tz->loc, TT_PAREN_R, NULL); break; - case '(': TZ_TOK_PUSH(tz->loc, TT_PAREN_L, NULL); break; - case ':': TZ_TOK_PUSH(tz->loc, TT_COLON, NULL); break; - case ';': TZ_TOK_PUSH(tz->loc, TT_SEMI, NULL); break; - case ',': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '.': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '&': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '*': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '+': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '-': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '/': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '|': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '=': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '<': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; - case '>': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break; + case '}': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '{': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_L, NULL); + tz->loc.col++; + } break; + case ']': { + TZ_TOK_PUSH(tz->loc, TT_BRACK_R, NULL); + tz->loc.col++; + } break; + case '[': { + TZ_TOK_PUSH(tz->loc, TT_BRACK_L, NULL); + tz->loc.col++; + } break; + case ')': { + TZ_TOK_PUSH(tz->loc, TT_PAREN_R, NULL); + tz->loc.col++; + } break; + case '(': { + TZ_TOK_PUSH(tz->loc, TT_PAREN_L, NULL); + tz->loc.col++; + } break; + case ':': { + TZ_TOK_PUSH(tz->loc, TT_COLON, NULL); + tz->loc.col++; + } break; + case ';': { + TZ_TOK_PUSH(tz->loc, TT_SEMI, NULL); + tz->loc.col++; + } break; + case ',': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '.': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '&': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '*': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '+': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '-': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '/': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '|': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '=': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '<': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; + case '>': { + TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); + tz->loc.col++; + } break; } } return tz; } + diff --git a/test.mcl b/test.mcl index 25593cb..1e25d2f 100644 --- a/test.mcl +++ b/test.mcl @@ -1,6 +1,6 @@ -main :: fn(argc: i32, argv: string[]) -> i32 { +main :: fn(argc: i32, argv: string -> i32 { println!("Hello world!\n"); }