morph/src/tokeniser.c
2025-06-25 22:09:56 +03:00

87 lines
2.4 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <token.h>
#include <tokeniser.h>
#include <dynarray.h>
token_t* tokenise_string(char* file_path, char* str) {
const int str_len = strlen(str);
loc_t loc = {0};
loc.file = file_path;
token_t* tokens = dynarray_create(token_t);
for (int i = 0; i < str_len; i++) {
switch (str[i]) {
case (' '):
case ('\t'):
case ('\r'): {
loc.col += 1;
continue;
}
case ('\n'): {
loc.col = 0;
loc.line += 1;
continue;
}
default: {
// TODO: manage memory better
// INFO: If you make an ident longer than 4kb i will murder you
char* buf = malloc(1024 * 4);
memset(buf, 0, 1024 * 4);
int buf_counter = 0;
while (i < str_len) {
if (str[i] == ' ' || str[i] == '\t' || str[i] == '\r' || str[i] == '\n') {
break;
}
buf[buf_counter++] = str[i];
i += 1;
}
buf[buf_counter + 1] = '\0';
printf("'%s'\n", buf);
// PERF: I dont know if this makes it faster or slower, need 2 check
buf = realloc(buf, strlen(buf) + 1);
bool found = false;
for (int i = 1; i < KW_COUNT__; i++) {
if (strcmp(KW_LIST[i], buf) == 0) {
token_t tok = {
.type = TT_KW,
.kw_type = (kw_type_t)i,
};
dynarray_push(tokens, tok);
found = true;
}
}
if (found) {
break;
}
for (int i = 1; i < OP_COUNT__; i++) {
if (strcmp(OP_LIST[i], buf) == 0) {
token_t tok = {
.type = TT_OP,
.op_type = (op_type_t)i,
};
dynarray_push(tokens, tok);
found = true;
}
}
if (found) {
break;
}
token_t tok = {
.type = TT_IDENT,
.str_v = strdup(buf),
};
dynarray_push(tokens, tok);
free(buf);
}
}
}
return tokens;
}