diff --git a/.clang-format b/.clang-format index 9ca6f4b..94e2a53 100644 --- a/.clang-format +++ b/.clang-format @@ -123,7 +123,7 @@ IncludeIsMainSourceRegex: '' IndentAccessModifiers: false IndentCaseBlocks: false IndentCaseLabels: false -IndentExternBlock: AfterExternBlock +IndentExternBlock: Indent IndentGotoLabels: true IndentPPDirectives: None IndentRequiresClause: true diff --git a/src/dynarray.c b/src/dynarray.c index 6e41fff..2b2d291 100644 --- a/src/dynarray.c +++ b/src/dynarray.c @@ -1,5 +1,7 @@ #include "dynarray.h" +#include +#include /* @@ -19,57 +21,74 @@ To set the ith element of the array, use either bracket notation // Returns a pointer to the start of a new dynarray (after the header) which // has `init_cap` units of `stride` bytes. -void *_dynarray_create(size_t init_cap, size_t stride) { - size_t header_size = DYNARRAY_FIELDS * sizeof(size_t); - size_t arr_size = init_cap * stride; - size_t *arr = (size_t *)malloc(header_size + arr_size); - arr[CAPACITY] = init_cap; - arr[LENGTH] = 0; - arr[STRIDE] = stride; - return (void *)(arr + DYNARRAY_FIELDS); +void* _dynarray_create(size_t init_cap, size_t stride) { + size_t header_size = DYNARRAY_FIELDS * sizeof(size_t); + size_t arr_size = init_cap * stride; + size_t* arr = (size_t*)malloc(header_size + arr_size); + arr[CAPACITY] = init_cap; + arr[LENGTH] = 0; + arr[STRIDE] = stride; + return (void*)(arr + DYNARRAY_FIELDS); } -void _dynarray_destroy(void *arr) { - free(arr - DYNARRAY_FIELDS * sizeof(size_t)); +void _dynarray_destroy(void* arr) { + free(arr - DYNARRAY_FIELDS * sizeof(size_t)); } // Returns the dynarray's field which is specified by passing // one of CAPACITY, LENGTH, STRIDE. -size_t _dynarray_field_get(void *arr, size_t field) { - return ((size_t *)(arr)-DYNARRAY_FIELDS)[field]; +size_t _dynarray_field_get(void* arr, size_t field) { + return ((size_t*)(arr)-DYNARRAY_FIELDS)[field]; } -void _dynarray_field_set(void *arr, size_t field, size_t value) { - ((size_t *)(arr)-DYNARRAY_FIELDS)[field] = value; +void _dynarray_field_set(void* arr, size_t field, size_t value) { + ((size_t*)(arr)-DYNARRAY_FIELDS)[field] = value; } // Allocates a new dynarray with twice the size of the one passed in, and // retaining the values that the original stored. -void *_dynarray_resize(void *arr) { - void *temp = _dynarray_create( // Allocate new dynarray w/ more space. - DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr), dynarray_stride(arr)); - memcpy(temp, arr, - dynarray_length(arr) * dynarray_stride(arr)); // Copy erythin' over. - _dynarray_field_set(temp, LENGTH, - dynarray_length(arr)); // Set `length` field. - _dynarray_destroy(arr); // Free previous array. - return temp; +void* _dynarray_resize(void* arr) { + void* temp = _dynarray_create( // Allocate new dynarray w/ more space. + DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr), dynarray_stride(arr)); + memcpy(temp, arr, + dynarray_length(arr) * dynarray_stride(arr)); // Copy erythin' over. + _dynarray_field_set(temp, LENGTH, + dynarray_length(arr)); // Set `length` field. + _dynarray_destroy(arr); // Free previous array. + return temp; } -void *_dynarray_push(void *arr, void *xptr) { - if (dynarray_length(arr) >= dynarray_capacity(arr)) - arr = _dynarray_resize(arr); +void* _dynarray_push(void* arr, void* xptr) { + if (dynarray_length(arr) >= dynarray_capacity(arr)) { + arr = _dynarray_resize(arr); + } - memcpy(arr + dynarray_length(arr) * dynarray_stride(arr), xptr, - dynarray_stride(arr)); - _dynarray_field_set(arr, LENGTH, dynarray_length(arr) + 1); - return arr; + memcpy(arr + dynarray_length(arr) * dynarray_stride(arr), xptr, dynarray_stride(arr)); + _dynarray_field_set(arr, LENGTH, dynarray_length(arr) + 1); + return arr; } // Removes the last element in the array, but copies it to `*dest` first. -void _dynarray_pop(void *arr, void *dest) { - memcpy(dest, arr + (dynarray_length(arr) - 1) * dynarray_stride(arr), - dynarray_stride(arr)); - _dynarray_field_set(arr, LENGTH, - dynarray_length(arr) - 1); // Decrement length. +int _dynarray_pop(void* arr, void* dest) { + if (dynarray_length(arr) < 1) { + return 1; + } + memcpy(dest, arr + (dynarray_length(arr) - 1) * dynarray_stride(arr), dynarray_stride(arr)); + _dynarray_field_set(arr, LENGTH, + dynarray_length(arr) - 1); // Decrement length. + return 0; +} + +// copies everything in reverse into a temporary array, and then memcpy's everything into the old array; +void _dynarray_reverse(void* arr) { + size_t stride = dynarray_stride(arr); + size_t length = dynarray_length(arr); + void* temp_arr = _dynarray_create(length, stride); + + void* temp_item = malloc(stride); + for (int i = dynarray_length(arr); i > 0; i--) { + dynarray_pop(arr, temp_item); + memcpy(arr + i * stride, temp_item, stride); + } + memcpy(arr, temp_arr, stride * length); } diff --git a/src/include/ast.h b/src/include/ast.h new file mode 100644 index 0000000..d375d10 --- /dev/null +++ b/src/include/ast.h @@ -0,0 +1,69 @@ +#ifndef _H_MORPH_AST +#define _H_MORPH_AST + +#include "loc.h" +#include "token.h" +#include + +typedef struct const_s { + const char* name; + + union { + const char* str_v; + size_t int_v; + }; +} const_t; + +typedef struct memory_s { + const char* name; + size_t size; +} memory_t; + +typedef struct ast_if_stat_s { + token_t* condition; + token_t* body; + bool is_elseif; + + union { + struct ast_if_stat_s* elseif; + token_t else_body; + }; +} ast_if_stat_t; + +typedef struct ast_while_stat_s { + token_t* condition; + token_t* body; +} ast_while_stat_t; + +typedef enum ast_op_type_e { + AOT_OP, + AOT_IF, + AOT_WHILE, +} ast_op_type_t; + +typedef struct ast_op_s { + ast_op_type_t type; + loc_t loc; + + union { + token_t op; + ast_if_stat_t if_stat; + ast_while_stat_t while_stat; + }; +} ast_op_t; + +typedef struct function_s { + loc_t loc; + const char* name; + const char** args; + const char** return_args; + ast_op_type_t* body; +} function_t; + +typedef struct program_s { + const_t const_vars; + function_t funcs; + memory_t memories; +} program_t; + +#endif // !_H_MORPH_AST diff --git a/src/include/dynarray.h b/src/include/dynarray.h index 311dc10..6f4944d 100644 --- a/src/include/dynarray.h +++ b/src/include/dynarray.h @@ -24,7 +24,9 @@ void _dynarray_field_set(void* arr, size_t field, size_t value); void* _dynarray_resize(void* arr); void* _dynarray_push(void* arr, void* xptr); -void _dynarray_pop(void* arr, void* dest); +// OK = 0; ERR = 1 +int _dynarray_pop(void* arr, void* dest); +void _dynarray_reverse(void* arr); #define DYNARRAY_DEFAULT_CAP 1 #define DYNARRAY_RESIZE_FACTOR 2 @@ -41,6 +43,7 @@ void _dynarray_pop(void* arr, void* dest); } while (0) #define dynarray_pop(arr, xptr) _dynarray_pop(arr, xptr) +#define dynarray_reverse(arr) _dynarray_reverse(arr) #define dynarray_capacity(arr) _dynarray_field_get(arr, CAPACITY) #define dynarray_length(arr) _dynarray_field_get(arr, LENGTH) diff --git a/src/include/logger.h b/src/include/logger.h new file mode 100644 index 0000000..2bea69a --- /dev/null +++ b/src/include/logger.h @@ -0,0 +1,20 @@ +#ifndef _H_MORPH_LOG +#define _H_MORPH_LOG + +#include + +typedef enum log_level_e { + ERROR, + WARN, + INFO, + DEBUG, +} log_level_t; + +void _log(loc_t* loc, log_level_t level, const char* fmt, ...); + +#define log_error(loc, ...) _log(loc, ERROR, __VA_ARGS__) +#define log_warn(loc, ...) _log(loc, WARN, __VA_ARGS__) +#define log_info(loc, ...) _log(loc, INFO, __VA_ARGS__) +#define log_debug(loc, ...) _log(loc, DEBUG, __VA_ARGS__) + +#endif // !_H_MORPH_LOG diff --git a/src/include/parser.h b/src/include/parser.h index e69de29..f759046 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -0,0 +1,8 @@ +#ifndef _H_MORPH_PARSER +#define _H_MORPH_PARSER + +#include "ast.h" +#include "token.h" +program_t parse(const token_t* tokens); + +#endif // _H_MORPH_PARSER diff --git a/src/include/token.h b/src/include/token.h index 8e1c856..8958a6d 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -33,6 +33,7 @@ typedef enum kw_type_e { KW_WHILE, KW_CONST, KW_MEMORY, + KW_INCLUDE, KW_COUNT__, } kw_type_t; @@ -119,4 +120,6 @@ typedef struct token_s { extern const char* OP_LIST[]; extern const char* KW_LIST[]; +const char* get_tok_str_dbg(token_t* tok); +const char* get_tok_type_str_dbg(token_type_t tok); #endif // _H_MORPH_TOKEN diff --git a/src/logger.c b/src/logger.c new file mode 100644 index 0000000..0df7a81 --- /dev/null +++ b/src/logger.c @@ -0,0 +1,27 @@ +#include "loc.h" +#include +#include +#include + +#define ERROR_PFX "\x1b[1;31merror\x1b[0m" +#define WARN_PFX "\x1b[1;33mwarn\x1b[0m" +#define INFO_PFX "\x1b[1;32minfo\x1b[0m" +#define DEBUG_PFX "\x1b[1;34mdebug\x1b[0m" + +void _log(loc_t* loc, log_level_t level, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + if (loc) { + printf("%s:%d:%d ", loc->file, loc->line, loc->col); + } + switch (level) { // clang-format off + case (ERROR): printf(ERROR_PFX); break; + case (WARN): printf(WARN_PFX); break; + case (INFO): printf(INFO_PFX); break; + case (DEBUG): printf(DEBUG_PFX); break; + } // clang-format on + printf(": "); + vprintf(fmt, args); + printf("\n"); + va_end(args); +} diff --git a/src/parser.c b/src/parser.c index e69de29..b8d505d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -0,0 +1,131 @@ +#include "logger.h" +#include "tokeniser.h" +#include "util.h" +#include +#include +#include +#include +#include +#include + +typedef struct parser_state_s { + program_t prog; + token_t curr_tok; + token_t* tokens; +} parser_state_t; + +token_t* expect_token_type(parser_state_t* state, token_type_t type) { + const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type); + if (dynarray_pop(state->tokens, &state->curr_tok) != 0) { + log_error(&state->curr_tok.loc, "Invalid word, expected %s, got nothing.", exoected_type_str); + return NULL; + } + if (state->curr_tok.type != type) { + const char* s1 = get_tok_str_dbg(&state->curr_tok); + log_error(&state->curr_tok.loc, "Invalid word, expected %s, got %s.", exoected_type_str, s1); + return NULL; + } + return &state->curr_tok; +} + +token_t* expect_token_type_ex(parser_state_t* state, token_type_t type, int inner_type) { + const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type); + if (dynarray_pop(state->tokens, &state->curr_tok) != 0) { + log_error(&state->curr_tok.loc, "Invalid word, expected %s, got nothing.", exoected_type_str); + return NULL; + } + // NOTE: all inner types are in a union + if (state->curr_tok.type != type || state->curr_tok.kw_type != inner_type) { + const char* s1 = get_tok_str_dbg(&state->curr_tok); + log_error(&state->curr_tok.loc, "Invalid word, expected %s, got %s.", exoected_type_str, s1); + return NULL; + } + return &state->curr_tok; +} + +// Tests token type, and inner type, so like, op type, kw type, etc. +// Does not edit state.curr_tok +token_t* test_token_type_ex(parser_state_t* state, token_type_t tok_type, int gen_type) { + const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type); + token_t* token = &state->tokens[dynarray_length(state->tokens - 1)]; + // NOTE: tests inner type in a generic way + if (token->type != tok_type || ((int)token->kw_type) != gen_type) { + return NULL; + } + return token; +} + +token_t* test_token_type(parser_state_t* state, token_type_t tok_type) { + const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type); + token_t* token = &state->tokens[dynarray_length(state->tokens - 1)]; + if (token->type != tok_type) { + return NULL; + } + return token; +} + +int _parse(parser_state_t* state); + +program_t parse(const token_t* tokens) { + // reverses tokens so its way easier to parse; + dynarray_reverse((void*)tokens); + parser_state_t state = {0}; + state.tokens = (token_t*)tokens; + (void)tokens; + _parse(&state); + return state.prog; +} + +int _parse(parser_state_t* state) { + while (dynarray_pop(state->tokens, &state->curr_tok) == 0) { + if (state->curr_tok.type != TT_KW) { + const char* dbg_s = get_tok_str_dbg(&state->curr_tok); + log_error(&state->curr_tok.loc, "Invalid word, expected Keyword, got %s.", dbg_s); + free((void*)dbg_s); + } + switch (state->curr_tok.kw_type) { + case (KW_INCLUDE): { + expect_token_type(state, TT_PUSH_STR); + const char* code = read_to_string(state->curr_tok.str_v); + if (!code) { + return 1; + } + token_t* tokens = tokenise_string((char*)state->curr_tok.str_v, (char*)code); + if (!tokens) { + return 1; + } + if (_parse(state) != 0) { + return 1; + } + }; break; + case (KW_CONST): { + // TODO: Implement compile time calculation of the value + token_t* tokens = dynarray_create(token_t); + + while (test_token_type_ex(state, TT_KW, KW_END) == NULL) { + // clang-format off + if ( + test_token_type(state, TT_PUSH_STR) || test_token_type(state, TT_PUSH_CSTR) || + test_token_type(state, TT_PUSH_CHAR) || test_token_type(state, TT_PUSH_INT) || + test_token_type(state, TT_PUSH_FLOAT) || test_token_type(state, TT_PUSH_MEM) + ) { // clang-format on + dynarray_pop(state->tokens, &state->curr_tok); + dynarray_push(tokens, state->curr_tok); + } + } + if (!expect_token_type_ex(state, TT_KW, KW_END)) { + return 1; + } + + }; break; + case (KW_MEMORY): { + + }; break; + case (KW_FN): { + + }; break; + default: + assert(true && "TODO: parse all kw's"); + } + } +} diff --git a/src/token.c b/src/token.c index 4f38069..0de002c 100644 --- a/src/token.c +++ b/src/token.c @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include // clang-format off @@ -59,7 +63,65 @@ const char* KW_LIST[] = { [KW_ELSE] = "else", [KW_WHILE] = "while", [KW_CONST] = "const", - [KW_MEMORY] = "memory" + [KW_MEMORY] = "memory", + [KW_INCLUDE] = "include", }; +const char* get_tok_type_str_dbg(token_type_t tok_t) { + switch (tok_t) { + case (TT_KW): return "Keyword"; + case (TT_OP): return "Operator"; + case (TT_IDENT): return "Identifier"; + case (TT_PUSH_CHAR): return "Char Literal"; + case (TT_PUSH_INT): return "Intager"; + case (TT_PUSH_STR): return "String"; + case (TT_PUSH_CSTR): return "CString"; + case (TT_PUSH_MEM): return "Memory address"; + case (TT_PUSH_FLOAT): return "Float"; + case (TT_NONE): assert(true && "Invalid"); + } + return "Unreachable"; +} +const char* get_tok_str_dbg(token_t* tok) { + int buf_size = 1024*4; + char* buf = (char*)malloc(buf_size); + switch (tok->type) { + case (TT_KW): { + snprintf(buf, buf_size, "Keyword '%s'", KW_LIST[tok->kw_type]); + }; break; + case (TT_OP): { + snprintf(buf, buf_size, "Operator '%s'", OP_LIST[tok->op_type]); + }; break; + case (TT_IDENT): { + snprintf(buf, buf_size, "Identifier '%s'", tok->str_v); + }; break; + case (TT_PUSH_CHAR): { + snprintf(buf, buf_size, "'%c'", tok->char_v); + }; break; + case (TT_PUSH_INT): { + snprintf(buf, buf_size, "%zu", tok->int_v); + }; break; + case (TT_PUSH_STR): { + while (strlen(tok->str_v) > buf_size-2) { + buf = realloc(buf, buf_size*=2); + } + snprintf(buf, buf_size, "\"%s\"", tok->str_v); + }; break; + case (TT_PUSH_CSTR): { + while (strlen(tok->str_v) > buf_size-3) { + buf = realloc(buf, buf_size*=2); + } + snprintf(buf, buf_size, "c\"%s\"", tok->str_v); + }; break; + case (TT_PUSH_MEM): { + snprintf(buf, buf_size, "Memory address label '%s'", tok->str_v); + }; break; + case (TT_PUSH_FLOAT): { + snprintf(buf, buf_size, "%f", tok->float_v); + }; break; + case (TT_NONE): assert(true && "Invalid"); + } + return buf; +} + // clang-format on diff --git a/src/tokeniser.c b/src/tokeniser.c index b310986..cba0941 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -78,9 +79,65 @@ token_t* tokenise_string(char* file_path, char* str) { loc.line += 1; continue; } + case ('\''): { + char* buf = malloc(32); + bool escaped = false; + for (int y = 0;;) { + if (str[i] == '\'' && !escaped) { + break; + } + if (str[i] == '\\' && !escaped) { + escaped = true; + continue; + } + if (escaped) { + switch (str[i]) { + case ('r'): + buf[0] = '\r'; + break; + case ('\''): + buf[0] = '\''; + break; + case ('n'): + buf[0] = '\n'; + break; + case ('t'): + buf[0] = '\t'; + break; + case ('\\'): + buf[0] = '\\'; + break; + } + y = 1; + } + if (str[i] == '\n') { + printf("%s:%d:%d: Error: Multi line chars not supported\n", loc.file, loc.line, loc.col); + return NULL; + } + if (y > 0) { + printf("%s:%d:%d: Error: Character literals must only have 1 char inside of them\n", loc.file, loc.line, loc.col); + return NULL; + } + loc.col += 1; + buf[y++] = str[i++]; + } + + token_t tok = { + .type = TT_PUSH_CHAR, + .str_v = buf, + }; + dynarray_push(tokens, tok); + } break; case ('c'): case ('"'): { + bool is_cstr = false; if (str[i] == 'c') { + if (str[i + 1] != '"') { + goto tokenise_ident; + } + i += 1; + loc.col += 1; + is_cstr = true; } char* buf = malloc(1024 * 4); bool escaped = false; @@ -95,16 +152,22 @@ token_t* tokenise_string(char* file_path, char* str) { if (str[i] == '\\' && escaped) { escaped = false; } + if (str[i] == '\n') { + printf("%s:%d:%d: Error: Multi line strings not supported\n", loc.file, loc.line, loc.col); + return NULL; + } + loc.col += 1; buf[y++] = str[i++]; } token_t tok = { - .type = TT_PUSH_STR, + .type = is_cstr ? TT_PUSH_CSTR : TT_PUSH_STR, .str_v = buf, }; dynarray_push(tokens, tok); } break; default: { + tokenise_ident: // TODO: manage memory better // INFO: If you make an ident longer than 4kb i will murder you char* buf = malloc(1024 * 4); @@ -115,6 +178,7 @@ token_t* tokenise_string(char* file_path, char* str) { break; } buf[buf_counter++] = str[i]; + loc.col += 1; i += 1; } buf[buf_counter + 1] = '\0';