#include #include #include #include "tokeniser.h" #include "dyn_arr.h" #include "logger.h" #include "token.h" DEFINE_DA_IMPL(token, token_t) #define TZ_TOK_PUSH(_loc, _type, _text) mcl_da_token_push(&tz->tokens, (token_t){.loc = (_loc), .type = (_type), .text=(_text)}) static struct { char* k; token_type_t v; } KEYWORDS[] = { {"fn", TT_KW_FN }, {"return", TT_KW_RETURN}, {"for", TT_KW_FOR}, {"if", TT_KW_IF}, {"else", TT_KW_ELSE}, {"enum", TT_KW_ENUM}, {"struct", TT_KW_STRUCT}, {"while", TT_KW_WHILE}, {"break", TT_KW_BREAK}, {"continue", TT_KW_CONTINUE} }; tokeniser_t* tokenise(char* file) { tokeniser_t* tz = malloc(sizeof(tokeniser_t)); tz->loc.file = file; tz->loc.line = 1; tz->loc.col = 1; FILE* f = fopen(file, "r"); if (!f) { printf("Could not open file %s\n", file); return NULL; } char c; while ((c = fgetc(f)) != EOF) { switch(c) { case '\t': case '\r': case ' ':{ tz->loc.col += 1; } break; case '\n': { tz->loc.col = 1; tz->loc.line++; } break; case '"': { loc_t loc = tz->loc; int size = 256; int i = 0; char* buf = malloc(size * sizeof(char)); while ((c = fgetc(f)) != EOF) { tz->loc.col++; if (size <= strlen(buf) - 1) { buf = realloc(buf, size *= 2); } if (c == '\n') { mcl_log_loc(ERROR, &tz->loc, "No newlines in strings"); return NULL; } if (c == '"') break; if (c == '\\') { switch (c = fgetc(f)) { case 'n': c = '\n'; break; case '\\': break; } } buf[i++] = c; } TZ_TOK_PUSH(loc, TT_STR, buf); } break; case '\'': { loc_t loc = tz->loc; bool escape = false; char c = fgetc(f); tz->loc.col += 1; char* buf = malloc(1 * sizeof(char)); if (c == '\\') { c = fgetc(f); tz->loc.col += 1; switch (c) { case 'n': *buf = '\n'; break; default: mcl_log_loc(ERROR, &tz->loc, "Unknown escape: \\%c\n", c); return NULL; } } else { tz->loc.col += 1; *buf = c; } c = fgetc(f); if (c != '\'') { mcl_log_loc(ERROR, &tz->loc, "Expected \"'\" found \"%c\"", c); return NULL; } TZ_TOK_PUSH(loc, TT_CHR, buf); } case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'h': case 'H': case 'i': case 'I': case 'j': case 'J': case 'k': case 'K': case 'l': case 'L': case 'm': case 'M': case 'n': case 'N': case 'o': case 'O': case 'p': case 'P': case 'q': case 'Q': case 'r': case 'R': case 's': case 'S': case 't': case 'T': case 'u': case 'U': case 'v': case 'V': case 'w': case 'W': case 'y': case 'Y': case 'z': case 'Z': case '_': { loc_t loc = tz->loc; int size = 256; int i = 1; char* buf = malloc(size * sizeof(char)); buf[0] = c; while ((c = fgetc(f)) != EOF) { if (!( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_') )) { tz->loc.col++; ungetc(c, f); break; }; if (size <= strlen(buf) - 1) { buf = realloc(buf, size *= 2); } tz->loc.col++; buf[i++] = c; } bool found = false; for (int i = 0; i < sizeof(KEYWORDS)/sizeof(KEYWORDS[0]); i++) { if (strcmp(buf, KEYWORDS[i].k) == 0) { TZ_TOK_PUSH(loc, KEYWORDS[i].v, NULL); found = true; } } if (!found) TZ_TOK_PUSH(loc, TT_IDENT, buf); } break; case '}': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '{': { TZ_TOK_PUSH(tz->loc, TT_CURLY_L, NULL); tz->loc.col++; } break; case ']': { TZ_TOK_PUSH(tz->loc, TT_BRACK_R, NULL); tz->loc.col++; } break; case '[': { TZ_TOK_PUSH(tz->loc, TT_BRACK_L, NULL); tz->loc.col++; } break; case ')': { TZ_TOK_PUSH(tz->loc, TT_PAREN_R, NULL); tz->loc.col++; } break; case '(': { TZ_TOK_PUSH(tz->loc, TT_PAREN_L, NULL); tz->loc.col++; } break; case ':': { TZ_TOK_PUSH(tz->loc, TT_COLON, NULL); tz->loc.col++; } break; case ';': { TZ_TOK_PUSH(tz->loc, TT_SEMI, NULL); tz->loc.col++; } break; case ',': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '.': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '&': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '*': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '+': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '-': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '/': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '|': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '=': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '<': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; case '>': { TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); tz->loc.col++; } break; } } return tz; }