Uwu nya lexer has bween made

:3
This commit is contained in:
Gvidas Juknevičius 2024-07-24 21:06:48 +03:00
parent 5819105c81
commit 6322f2f93c
Signed by: MCorange
GPG Key ID: 12B1346D720B7FBB
8 changed files with 304 additions and 5 deletions

View File

@ -9,21 +9,21 @@ typedef struct mcl_da_##name##_s { \
size_t count; \
size_t capacity; \
} mcl_da_##name##_t; \
type mcl_da_##name##_pop(mcl_da_##name##_t* da); \
type* mcl_da_##name##_pop(mcl_da_##name##_t* da); \
void mcl_da_##name##_push(mcl_da_##name##_t* da, type item); \
void mcl_da_##name##_free(mcl_da_##name##_t* da); \
#define DEFINE_DA_IMPL(name, type) \
type mcl_da_##name##_pop(mcl_da_##name##_t* da) { \
type* mcl_da_##name##_pop(mcl_da_##name##_t* da) { \
if (da->capacity <= 0 || da->count <= 0) \
return NULL; \
if (da->count < da->capacity / 2) { \
da->capacity /= 2; \
da->items = realloc(da->items, \
da->capacity * sizeof(type)); \
da->capacity * sizeof(type)); \
assert(da->items && "Out of memory"); \
} \
return da->items[(da->count--) - 1]; \
return &da->items[(da->count--) - 1]; \
} \
void mcl_da_##name##_push(mcl_da_##name##_t* da, type item) { \
if (da->capacity <= da->count) { \
@ -33,7 +33,7 @@ void mcl_da_##name##_push(mcl_da_##name##_t* da, type item) { \
da->capacity *= 2; \
} \
da->items = realloc(da->items, \
da->capacity * sizeof(type)); \
da->capacity * sizeof(type)); \
assert(da->items && "Out of memory"); \
} \
da->items[da->count++] = item; \

11
src/include/loc.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef _H_MCL_LOC
#define _H_MCL_LOC
#include <stdio.h>
typedef struct loc_s {
char* file;
size_t col;
size_t line;
} loc_t;
#endif

40
src/include/token.h Normal file
View File

@ -0,0 +1,40 @@
#ifndef _H_MCL_TOKEN
#define _H_MCL_TOKEN
#include "loc.h"
typedef enum token_type_e {
TT_IDENT, // identifier
TT_STR, // "*"
TT_CHR, // '*'
TT_CURLY_R, // }
TT_CURLY_L, // {
TT_BRACK_R, // ]
TT_BRACK_L, // [
TT_PAREN_R, // )
TT_PAREN_L, // (
TT_COLON, // :
TT_SEMI, // ;
TT_COMMA, // ,
TT_DOT, // .
TT_AMP, // &
TT_STAR, // *
TT_PLUS, // +
TT_DASH, // -
TT_FSLASH, // /
TT_BAR, // |
TT_EQ, // =
TT_LT, // <
TT_GT, // >
} token_type_t;
typedef struct token_s {
token_type_t type;
char* text;
loc_t loc;
} token_t;
char* token_to_string(token_t* tt);
#endif

View File

@ -1,5 +1,20 @@
#ifndef _H_MCL_TOKENSIER
#define _H_MCL_TOKENSIER
#include <stdlib.h>
#include <assert.h>
#include "dyn_arr.h"
#include "loc.h"
#include "token.h"
DEFINE_DA(token, token_t)
typedef struct tokeniser_s {
mcl_da_token_t tokens;
loc_t loc;
} tokeniser_t;
tokeniser_t* tokenise(char* file);
#endif

View File

@ -3,15 +3,31 @@
#include <assert.h>
#include "cliargs.h"
#include "dyn_arr.h"
#include "token.h"
#include "tokeniser.h"
int main(int argc, char** argv) {
cliargs_t* cliargs = parse_cliargs(argc, argv);
printf("Hewo world :33\n");
printf("Output file: %s\n", cliargs->output);
MCL_DA_FOR_IN(char*, &cliargs->input, file, {
printf("Input file: %s\n", file);
tokeniser_t* tokeniser = tokenise(file);
if (!tokeniser) {
printf("Failed to tokenise\n");
return 1;
}
MCL_DA_FOR_IN(token_t, &tokeniser->tokens, token, {
printf("%s:%zu:%zu: %s\n",
token.loc.file,
token.loc.line,
token.loc.col,
token_to_string(&token));
});
});
return 0;
}

61
src/token.c Normal file
View File

@ -0,0 +1,61 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "token.h"
char* token_to_string(token_t* t) {
char* buf = {0};
int len = 0;
switch(t->type) {
case TT_IDENT:
return t->text;
case TT_STR:
len = strlen(t->text)+3;
buf = malloc(len);
snprintf(buf, len, "\"%s\"", t->text);
return buf;
case TT_CHR:
buf = malloc(4);
snprintf(buf, 4, "'%c\'", *t->text);
return buf;
case TT_CURLY_R:
return "}";
case TT_CURLY_L:
return "{";
case TT_BRACK_R:
return "]";
case TT_BRACK_L:
return "[";
case TT_PAREN_R:
return ")";
case TT_PAREN_L:
return "(";
case TT_COLON:
return ":";
case TT_SEMI:
return ";";
case TT_COMMA:
return ",";
case TT_DOT:
return ".";
case TT_AMP:
return "&";
case TT_STAR:
return "*";
case TT_PLUS:
return "+";
case TT_DASH:
return "-";
case TT_FSLASH:
return "/";
case TT_BAR:
return "|";
case TT_EQ:
return "=";
case TT_LT:
return "<";
case TT_GT:
return ">";
}
}

View File

@ -1,4 +1,154 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tokeniser.h"
#include "dyn_arr.h"
#include "token.h"
DEFINE_DA_IMPL(token, token_t)
#define TZ_TOK_PUSH(_loc, _type, _text) mcl_da_token_push(&tz->tokens, (token_t){.loc = (_loc), .type = (_type), .text=(_text)})
tokeniser_t* tokenise(char* file) {
tokeniser_t* tz = malloc(sizeof(tokeniser_t));
tz->loc.file = file;
FILE* f = fopen(file, "r");
if (!f) {
printf("Could not open file %s\n", file);
return NULL;
}
char c;
while ((c = fgetc(f)) != EOF) {
tz->loc.col++;
switch(c) {
case ' ':
case '\t':
case '\r': break;
case '\n': {
tz->loc.col = 0;
tz->loc.line++;
} break;
case '"': {
int size = 256;
int i = 0;
char* buf = malloc(size * sizeof(char));
while ((c = fgetc(f)) != EOF) {
if (size <= strlen(buf) - 1) {
buf = realloc(buf, size *= 2);
}
if (c == '\n') {
printf("ERROR: Newline in string\n");
return NULL;
}
tz->loc.col++;
if (c == '"') break;
if (c == '\\') {
switch (c = fgetc(f)) {
case 'n': c = '\n'; break;
case '\\': break;
}
}
buf[i++] = c;
}
TZ_TOK_PUSH(tz->loc, TT_STR, buf);
} break;
case '\'': {
bool escape = false;
char c = fgetc(f);
char* buf = malloc(1 * sizeof(char));
if (c == '\\') {
c = fgetc(f);
switch (c) {
case 'n': *buf = '\n'; break;
default:
printf("ERROR: Unknown escape: \\%c\n", c);
return NULL;
}
} else {
*buf = c;
}
TZ_TOK_PUSH(tz->loc, TT_CHR, buf);
}
case 'a': case 'A':
case 'b': case 'B':
case 'c': case 'C':
case 'd': case 'D':
case 'e': case 'E':
case 'f': case 'F':
case 'g': case 'G':
case 'h': case 'H':
case 'i': case 'I':
case 'j': case 'J':
case 'k': case 'K':
case 'l': case 'L':
case 'm': case 'M':
case 'n': case 'N':
case 'o': case 'O':
case 'p': case 'P':
case 'q': case 'Q':
case 'r': case 'R':
case 's': case 'S':
case 't': case 'T':
case 'u': case 'U':
case 'v': case 'V':
case 'w': case 'W':
case 'y': case 'Y':
case 'z': case 'Z':
case '_': {
int size = 256;
int i = 1;
char* buf = malloc(size * sizeof(char));
buf[0] = c;
while ((c = fgetc(f)) != EOF) {
if (!( (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
(c == '_')
)) {
ungetc(c, f);
break;
};
if (size <= strlen(buf) - 1) {
buf = realloc(buf, size *= 2);
}
tz->loc.col++;
buf[i++] = c;
}
TZ_TOK_PUSH(tz->loc, TT_IDENT, buf);
} break;
case '}': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '{': TZ_TOK_PUSH(tz->loc, TT_CURLY_L, NULL); break;
case ']': TZ_TOK_PUSH(tz->loc, TT_BRACK_R, NULL); break;
case '[': TZ_TOK_PUSH(tz->loc, TT_BRACK_L, NULL); break;
case ')': TZ_TOK_PUSH(tz->loc, TT_PAREN_R, NULL); break;
case '(': TZ_TOK_PUSH(tz->loc, TT_PAREN_L, NULL); break;
case ':': TZ_TOK_PUSH(tz->loc, TT_COLON, NULL); break;
case ';': TZ_TOK_PUSH(tz->loc, TT_SEMI, NULL); break;
case ',': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '.': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '&': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '*': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '+': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '-': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '/': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '|': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '=': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '<': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
case '>': TZ_TOK_PUSH(tz->loc, TT_CURLY_R, NULL); break;
}
}
return tz;
}

6
test.mcl Normal file
View File

@ -0,0 +1,6 @@
main :: fn(argc: i32, argv: string[]) -> i32 {
println!("Hello world!\n");
}