This commit is contained in:
Gvidas Juknevičius 2025-06-30 19:19:49 +03:00
parent a9b8c9dd5a
commit cef7b2bc82
Signed by: MCorange
GPG Key ID: 5BE6B533CB76FE86
11 changed files with 445 additions and 39 deletions

View File

@ -123,7 +123,7 @@ IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false IndentAccessModifiers: false
IndentCaseBlocks: false IndentCaseBlocks: false
IndentCaseLabels: false IndentCaseLabels: false
IndentExternBlock: AfterExternBlock IndentExternBlock: Indent
IndentGotoLabels: true IndentGotoLabels: true
IndentPPDirectives: None IndentPPDirectives: None
IndentRequiresClause: true IndentRequiresClause: true

View File

@ -1,5 +1,7 @@
#include "dynarray.h" #include "dynarray.h"
#include <stdlib.h>
#include <string.h>
/* /*
@ -19,34 +21,34 @@ To set the ith element of the array, use either bracket notation
// Returns a pointer to the start of a new dynarray (after the header) which // Returns a pointer to the start of a new dynarray (after the header) which
// has `init_cap` units of `stride` bytes. // has `init_cap` units of `stride` bytes.
void *_dynarray_create(size_t init_cap, size_t stride) { void* _dynarray_create(size_t init_cap, size_t stride) {
size_t header_size = DYNARRAY_FIELDS * sizeof(size_t); size_t header_size = DYNARRAY_FIELDS * sizeof(size_t);
size_t arr_size = init_cap * stride; size_t arr_size = init_cap * stride;
size_t *arr = (size_t *)malloc(header_size + arr_size); size_t* arr = (size_t*)malloc(header_size + arr_size);
arr[CAPACITY] = init_cap; arr[CAPACITY] = init_cap;
arr[LENGTH] = 0; arr[LENGTH] = 0;
arr[STRIDE] = stride; arr[STRIDE] = stride;
return (void *)(arr + DYNARRAY_FIELDS); return (void*)(arr + DYNARRAY_FIELDS);
} }
void _dynarray_destroy(void *arr) { void _dynarray_destroy(void* arr) {
free(arr - DYNARRAY_FIELDS * sizeof(size_t)); free(arr - DYNARRAY_FIELDS * sizeof(size_t));
} }
// Returns the dynarray's field which is specified by passing // Returns the dynarray's field which is specified by passing
// one of CAPACITY, LENGTH, STRIDE. // one of CAPACITY, LENGTH, STRIDE.
size_t _dynarray_field_get(void *arr, size_t field) { size_t _dynarray_field_get(void* arr, size_t field) {
return ((size_t *)(arr)-DYNARRAY_FIELDS)[field]; return ((size_t*)(arr)-DYNARRAY_FIELDS)[field];
} }
void _dynarray_field_set(void *arr, size_t field, size_t value) { void _dynarray_field_set(void* arr, size_t field, size_t value) {
((size_t *)(arr)-DYNARRAY_FIELDS)[field] = value; ((size_t*)(arr)-DYNARRAY_FIELDS)[field] = value;
} }
// Allocates a new dynarray with twice the size of the one passed in, and // Allocates a new dynarray with twice the size of the one passed in, and
// retaining the values that the original stored. // retaining the values that the original stored.
void *_dynarray_resize(void *arr) { void* _dynarray_resize(void* arr) {
void *temp = _dynarray_create( // Allocate new dynarray w/ more space. void* temp = _dynarray_create( // Allocate new dynarray w/ more space.
DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr), dynarray_stride(arr)); DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr), dynarray_stride(arr));
memcpy(temp, arr, memcpy(temp, arr,
dynarray_length(arr) * dynarray_stride(arr)); // Copy erythin' over. dynarray_length(arr) * dynarray_stride(arr)); // Copy erythin' over.
@ -56,20 +58,37 @@ void *_dynarray_resize(void *arr) {
return temp; return temp;
} }
void *_dynarray_push(void *arr, void *xptr) { void* _dynarray_push(void* arr, void* xptr) {
if (dynarray_length(arr) >= dynarray_capacity(arr)) if (dynarray_length(arr) >= dynarray_capacity(arr)) {
arr = _dynarray_resize(arr); arr = _dynarray_resize(arr);
}
memcpy(arr + dynarray_length(arr) * dynarray_stride(arr), xptr, memcpy(arr + dynarray_length(arr) * dynarray_stride(arr), xptr, dynarray_stride(arr));
dynarray_stride(arr));
_dynarray_field_set(arr, LENGTH, dynarray_length(arr) + 1); _dynarray_field_set(arr, LENGTH, dynarray_length(arr) + 1);
return arr; return arr;
} }
// Removes the last element in the array, but copies it to `*dest` first. // Removes the last element in the array, but copies it to `*dest` first.
void _dynarray_pop(void *arr, void *dest) { int _dynarray_pop(void* arr, void* dest) {
memcpy(dest, arr + (dynarray_length(arr) - 1) * dynarray_stride(arr), if (dynarray_length(arr) < 1) {
dynarray_stride(arr)); return 1;
}
memcpy(dest, arr + (dynarray_length(arr) - 1) * dynarray_stride(arr), dynarray_stride(arr));
_dynarray_field_set(arr, LENGTH, _dynarray_field_set(arr, LENGTH,
dynarray_length(arr) - 1); // Decrement length. dynarray_length(arr) - 1); // Decrement length.
return 0;
}
// copies everything in reverse into a temporary array, and then memcpy's everything into the old array;
void _dynarray_reverse(void* arr) {
size_t stride = dynarray_stride(arr);
size_t length = dynarray_length(arr);
void* temp_arr = _dynarray_create(length, stride);
void* temp_item = malloc(stride);
for (int i = dynarray_length(arr); i > 0; i--) {
dynarray_pop(arr, temp_item);
memcpy(arr + i * stride, temp_item, stride);
}
memcpy(arr, temp_arr, stride * length);
} }

69
src/include/ast.h Normal file
View File

@ -0,0 +1,69 @@
#ifndef _H_MORPH_AST
#define _H_MORPH_AST
#include "loc.h"
#include "token.h"
#include <stddef.h>
typedef struct const_s {
const char* name;
union {
const char* str_v;
size_t int_v;
};
} const_t;
typedef struct memory_s {
const char* name;
size_t size;
} memory_t;
typedef struct ast_if_stat_s {
token_t* condition;
token_t* body;
bool is_elseif;
union {
struct ast_if_stat_s* elseif;
token_t else_body;
};
} ast_if_stat_t;
typedef struct ast_while_stat_s {
token_t* condition;
token_t* body;
} ast_while_stat_t;
typedef enum ast_op_type_e {
AOT_OP,
AOT_IF,
AOT_WHILE,
} ast_op_type_t;
typedef struct ast_op_s {
ast_op_type_t type;
loc_t loc;
union {
token_t op;
ast_if_stat_t if_stat;
ast_while_stat_t while_stat;
};
} ast_op_t;
typedef struct function_s {
loc_t loc;
const char* name;
const char** args;
const char** return_args;
ast_op_type_t* body;
} function_t;
typedef struct program_s {
const_t const_vars;
function_t funcs;
memory_t memories;
} program_t;
#endif // !_H_MORPH_AST

View File

@ -24,7 +24,9 @@ void _dynarray_field_set(void* arr, size_t field, size_t value);
void* _dynarray_resize(void* arr); void* _dynarray_resize(void* arr);
void* _dynarray_push(void* arr, void* xptr); void* _dynarray_push(void* arr, void* xptr);
void _dynarray_pop(void* arr, void* dest); // OK = 0; ERR = 1
int _dynarray_pop(void* arr, void* dest);
void _dynarray_reverse(void* arr);
#define DYNARRAY_DEFAULT_CAP 1 #define DYNARRAY_DEFAULT_CAP 1
#define DYNARRAY_RESIZE_FACTOR 2 #define DYNARRAY_RESIZE_FACTOR 2
@ -41,6 +43,7 @@ void _dynarray_pop(void* arr, void* dest);
} while (0) } while (0)
#define dynarray_pop(arr, xptr) _dynarray_pop(arr, xptr) #define dynarray_pop(arr, xptr) _dynarray_pop(arr, xptr)
#define dynarray_reverse(arr) _dynarray_reverse(arr)
#define dynarray_capacity(arr) _dynarray_field_get(arr, CAPACITY) #define dynarray_capacity(arr) _dynarray_field_get(arr, CAPACITY)
#define dynarray_length(arr) _dynarray_field_get(arr, LENGTH) #define dynarray_length(arr) _dynarray_field_get(arr, LENGTH)

20
src/include/logger.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef _H_MORPH_LOG
#define _H_MORPH_LOG
#include <loc.h>
typedef enum log_level_e {
ERROR,
WARN,
INFO,
DEBUG,
} log_level_t;
void _log(loc_t* loc, log_level_t level, const char* fmt, ...);
#define log_error(loc, ...) _log(loc, ERROR, __VA_ARGS__)
#define log_warn(loc, ...) _log(loc, WARN, __VA_ARGS__)
#define log_info(loc, ...) _log(loc, INFO, __VA_ARGS__)
#define log_debug(loc, ...) _log(loc, DEBUG, __VA_ARGS__)
#endif // !_H_MORPH_LOG

View File

@ -0,0 +1,8 @@
#ifndef _H_MORPH_PARSER
#define _H_MORPH_PARSER
#include "ast.h"
#include "token.h"
program_t parse(const token_t* tokens);
#endif // _H_MORPH_PARSER

View File

@ -33,6 +33,7 @@ typedef enum kw_type_e {
KW_WHILE, KW_WHILE,
KW_CONST, KW_CONST,
KW_MEMORY, KW_MEMORY,
KW_INCLUDE,
KW_COUNT__, KW_COUNT__,
} kw_type_t; } kw_type_t;
@ -119,4 +120,6 @@ typedef struct token_s {
extern const char* OP_LIST[]; extern const char* OP_LIST[];
extern const char* KW_LIST[]; extern const char* KW_LIST[];
const char* get_tok_str_dbg(token_t* tok);
const char* get_tok_type_str_dbg(token_type_t tok);
#endif // _H_MORPH_TOKEN #endif // _H_MORPH_TOKEN

27
src/logger.c Normal file
View File

@ -0,0 +1,27 @@
#include "loc.h"
#include <logger.h>
#include <stdarg.h>
#include <stdio.h>
#define ERROR_PFX "\x1b[1;31merror\x1b[0m"
#define WARN_PFX "\x1b[1;33mwarn\x1b[0m"
#define INFO_PFX "\x1b[1;32minfo\x1b[0m"
#define DEBUG_PFX "\x1b[1;34mdebug\x1b[0m"
void _log(loc_t* loc, log_level_t level, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
if (loc) {
printf("%s:%d:%d ", loc->file, loc->line, loc->col);
}
switch (level) { // clang-format off
case (ERROR): printf(ERROR_PFX); break;
case (WARN): printf(WARN_PFX); break;
case (INFO): printf(INFO_PFX); break;
case (DEBUG): printf(DEBUG_PFX); break;
} // clang-format on
printf(": ");
vprintf(fmt, args);
printf("\n");
va_end(args);
}

View File

@ -0,0 +1,131 @@
#include "logger.h"
#include "tokeniser.h"
#include "util.h"
#include <assert.h>
#include <stdlib.h>
#include <token.h>
#include <ast.h>
#include <dynarray.h>
#include <parser.h>
typedef struct parser_state_s {
program_t prog;
token_t curr_tok;
token_t* tokens;
} parser_state_t;
token_t* expect_token_type(parser_state_t* state, token_type_t type) {
const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type);
if (dynarray_pop(state->tokens, &state->curr_tok) != 0) {
log_error(&state->curr_tok.loc, "Invalid word, expected %s, got nothing.", exoected_type_str);
return NULL;
}
if (state->curr_tok.type != type) {
const char* s1 = get_tok_str_dbg(&state->curr_tok);
log_error(&state->curr_tok.loc, "Invalid word, expected %s, got %s.", exoected_type_str, s1);
return NULL;
}
return &state->curr_tok;
}
token_t* expect_token_type_ex(parser_state_t* state, token_type_t type, int inner_type) {
const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type);
if (dynarray_pop(state->tokens, &state->curr_tok) != 0) {
log_error(&state->curr_tok.loc, "Invalid word, expected %s, got nothing.", exoected_type_str);
return NULL;
}
// NOTE: all inner types are in a union
if (state->curr_tok.type != type || state->curr_tok.kw_type != inner_type) {
const char* s1 = get_tok_str_dbg(&state->curr_tok);
log_error(&state->curr_tok.loc, "Invalid word, expected %s, got %s.", exoected_type_str, s1);
return NULL;
}
return &state->curr_tok;
}
// Tests token type, and inner type, so like, op type, kw type, etc.
// Does not edit state.curr_tok
token_t* test_token_type_ex(parser_state_t* state, token_type_t tok_type, int gen_type) {
const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type);
token_t* token = &state->tokens[dynarray_length(state->tokens - 1)];
// NOTE: tests inner type in a generic way
if (token->type != tok_type || ((int)token->kw_type) != gen_type) {
return NULL;
}
return token;
}
token_t* test_token_type(parser_state_t* state, token_type_t tok_type) {
const char* exoected_type_str = get_tok_type_str_dbg(state->curr_tok.type);
token_t* token = &state->tokens[dynarray_length(state->tokens - 1)];
if (token->type != tok_type) {
return NULL;
}
return token;
}
int _parse(parser_state_t* state);
program_t parse(const token_t* tokens) {
// reverses tokens so its way easier to parse;
dynarray_reverse((void*)tokens);
parser_state_t state = {0};
state.tokens = (token_t*)tokens;
(void)tokens;
_parse(&state);
return state.prog;
}
int _parse(parser_state_t* state) {
while (dynarray_pop(state->tokens, &state->curr_tok) == 0) {
if (state->curr_tok.type != TT_KW) {
const char* dbg_s = get_tok_str_dbg(&state->curr_tok);
log_error(&state->curr_tok.loc, "Invalid word, expected Keyword, got %s.", dbg_s);
free((void*)dbg_s);
}
switch (state->curr_tok.kw_type) {
case (KW_INCLUDE): {
expect_token_type(state, TT_PUSH_STR);
const char* code = read_to_string(state->curr_tok.str_v);
if (!code) {
return 1;
}
token_t* tokens = tokenise_string((char*)state->curr_tok.str_v, (char*)code);
if (!tokens) {
return 1;
}
if (_parse(state) != 0) {
return 1;
}
}; break;
case (KW_CONST): {
// TODO: Implement compile time calculation of the value
token_t* tokens = dynarray_create(token_t);
while (test_token_type_ex(state, TT_KW, KW_END) == NULL) {
// clang-format off
if (
test_token_type(state, TT_PUSH_STR) || test_token_type(state, TT_PUSH_CSTR) ||
test_token_type(state, TT_PUSH_CHAR) || test_token_type(state, TT_PUSH_INT) ||
test_token_type(state, TT_PUSH_FLOAT) || test_token_type(state, TT_PUSH_MEM)
) { // clang-format on
dynarray_pop(state->tokens, &state->curr_tok);
dynarray_push(tokens, state->curr_tok);
}
}
if (!expect_token_type_ex(state, TT_KW, KW_END)) {
return 1;
}
}; break;
case (KW_MEMORY): {
}; break;
case (KW_FN): {
}; break;
default:
assert(true && "TODO: parse all kw's");
}
}
}

View File

@ -1,3 +1,7 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <token.h> #include <token.h>
// clang-format off // clang-format off
@ -59,7 +63,65 @@ const char* KW_LIST[] = {
[KW_ELSE] = "else", [KW_ELSE] = "else",
[KW_WHILE] = "while", [KW_WHILE] = "while",
[KW_CONST] = "const", [KW_CONST] = "const",
[KW_MEMORY] = "memory" [KW_MEMORY] = "memory",
[KW_INCLUDE] = "include",
}; };
const char* get_tok_type_str_dbg(token_type_t tok_t) {
switch (tok_t) {
case (TT_KW): return "Keyword";
case (TT_OP): return "Operator";
case (TT_IDENT): return "Identifier";
case (TT_PUSH_CHAR): return "Char Literal";
case (TT_PUSH_INT): return "Intager";
case (TT_PUSH_STR): return "String";
case (TT_PUSH_CSTR): return "CString";
case (TT_PUSH_MEM): return "Memory address";
case (TT_PUSH_FLOAT): return "Float";
case (TT_NONE): assert(true && "Invalid");
}
return "Unreachable";
}
const char* get_tok_str_dbg(token_t* tok) {
int buf_size = 1024*4;
char* buf = (char*)malloc(buf_size);
switch (tok->type) {
case (TT_KW): {
snprintf(buf, buf_size, "Keyword '%s'", KW_LIST[tok->kw_type]);
}; break;
case (TT_OP): {
snprintf(buf, buf_size, "Operator '%s'", OP_LIST[tok->op_type]);
}; break;
case (TT_IDENT): {
snprintf(buf, buf_size, "Identifier '%s'", tok->str_v);
}; break;
case (TT_PUSH_CHAR): {
snprintf(buf, buf_size, "'%c'", tok->char_v);
}; break;
case (TT_PUSH_INT): {
snprintf(buf, buf_size, "%zu", tok->int_v);
}; break;
case (TT_PUSH_STR): {
while (strlen(tok->str_v) > buf_size-2) {
buf = realloc(buf, buf_size*=2);
}
snprintf(buf, buf_size, "\"%s\"", tok->str_v);
}; break;
case (TT_PUSH_CSTR): {
while (strlen(tok->str_v) > buf_size-3) {
buf = realloc(buf, buf_size*=2);
}
snprintf(buf, buf_size, "c\"%s\"", tok->str_v);
}; break;
case (TT_PUSH_MEM): {
snprintf(buf, buf_size, "Memory address label '%s'", tok->str_v);
}; break;
case (TT_PUSH_FLOAT): {
snprintf(buf, buf_size, "%f", tok->float_v);
}; break;
case (TT_NONE): assert(true && "Invalid");
}
return buf;
}
// clang-format on // clang-format on

View File

@ -1,4 +1,5 @@
#include <limits.h> #include <limits.h>
#include <math.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -78,9 +79,65 @@ token_t* tokenise_string(char* file_path, char* str) {
loc.line += 1; loc.line += 1;
continue; continue;
} }
case ('\''): {
char* buf = malloc(32);
bool escaped = false;
for (int y = 0;;) {
if (str[i] == '\'' && !escaped) {
break;
}
if (str[i] == '\\' && !escaped) {
escaped = true;
continue;
}
if (escaped) {
switch (str[i]) {
case ('r'):
buf[0] = '\r';
break;
case ('\''):
buf[0] = '\'';
break;
case ('n'):
buf[0] = '\n';
break;
case ('t'):
buf[0] = '\t';
break;
case ('\\'):
buf[0] = '\\';
break;
}
y = 1;
}
if (str[i] == '\n') {
printf("%s:%d:%d: Error: Multi line chars not supported\n", loc.file, loc.line, loc.col);
return NULL;
}
if (y > 0) {
printf("%s:%d:%d: Error: Character literals must only have 1 char inside of them\n", loc.file, loc.line, loc.col);
return NULL;
}
loc.col += 1;
buf[y++] = str[i++];
}
token_t tok = {
.type = TT_PUSH_CHAR,
.str_v = buf,
};
dynarray_push(tokens, tok);
} break;
case ('c'): case ('c'):
case ('"'): { case ('"'): {
bool is_cstr = false;
if (str[i] == 'c') { if (str[i] == 'c') {
if (str[i + 1] != '"') {
goto tokenise_ident;
}
i += 1;
loc.col += 1;
is_cstr = true;
} }
char* buf = malloc(1024 * 4); char* buf = malloc(1024 * 4);
bool escaped = false; bool escaped = false;
@ -95,16 +152,22 @@ token_t* tokenise_string(char* file_path, char* str) {
if (str[i] == '\\' && escaped) { if (str[i] == '\\' && escaped) {
escaped = false; escaped = false;
} }
if (str[i] == '\n') {
printf("%s:%d:%d: Error: Multi line strings not supported\n", loc.file, loc.line, loc.col);
return NULL;
}
loc.col += 1;
buf[y++] = str[i++]; buf[y++] = str[i++];
} }
token_t tok = { token_t tok = {
.type = TT_PUSH_STR, .type = is_cstr ? TT_PUSH_CSTR : TT_PUSH_STR,
.str_v = buf, .str_v = buf,
}; };
dynarray_push(tokens, tok); dynarray_push(tokens, tok);
} break; } break;
default: { default: {
tokenise_ident:
// TODO: manage memory better // TODO: manage memory better
// INFO: If you make an ident longer than 4kb i will murder you // INFO: If you make an ident longer than 4kb i will murder you
char* buf = malloc(1024 * 4); char* buf = malloc(1024 * 4);
@ -115,6 +178,7 @@ token_t* tokenise_string(char* file_path, char* str) {
break; break;
} }
buf[buf_counter++] = str[i]; buf[buf_counter++] = str[i];
loc.col += 1;
i += 1; i += 1;
} }
buf[buf_counter + 1] = '\0'; buf[buf_counter + 1] = '\0';