diff --git a/src/main.c b/src/main.c index 7beaa55..e904b99 100644 --- a/src/main.c +++ b/src/main.c @@ -27,6 +27,9 @@ int main(int argc, const char** argv) { case (TT_IDENT): { printf("TOK: IDENT \"%s\"\n", tok.str_v); } break; + case (TT_PUSH_INT): { + printf("TOK: PUSH_INT \"%zu\"\n", tok.int_v); + } break; default: break; } diff --git a/src/tokeniser.c b/src/tokeniser.c index 3131f61..b310986 100644 --- a/src/tokeniser.c +++ b/src/tokeniser.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,6 +6,58 @@ #include #include +int parse_int(const char* s, int* out) { + + /* ----- handle optional sign ----- */ + long sign = 1; + if (*s == '+' || *s == '-') { + if (*s == '-') { + sign = -1; + } + ++s; + } + if (*s == '\0') { + return -1; + } /* sign only? :3 */ + + /* ----- binary special‑case ----- */ + if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { + const char* p = s + 2; + if (*p == '\0') { + return -1; + } /* “0b” but no digits */ + + long val = 0; + for (; *p; ++p) { + if (*p == '0' || *p == '1') { + val = (val << 1) | (*p - '0'); + } else { + return -1; + } /* stray char => error */ + } + val *= sign; + + if (val < INT_MIN || val > INT_MAX) { + return -1; + } + *out = (int)val; + return 0; + } + + /* ----- all other radices via strtol ----- */ + char* end; + long val = strtol((sign == -1) ? (s - 1) : s, &end, 0); /* feed original string */ + if (*end != '\0') { + return -1; + } /* trailing junk */ + if (val < INT_MIN || val > INT_MAX) { + return -1; + } + + *out = (int)val; + return 0; +} + token_t* tokenise_string(char* file_path, char* str) { const int str_len = strlen(str); loc_t loc = {0}; @@ -25,6 +78,32 @@ token_t* tokenise_string(char* file_path, char* str) { loc.line += 1; continue; } + case ('c'): + case ('"'): { + if (str[i] == 'c') { + } + char* buf = malloc(1024 * 4); + bool escaped = false; + for (int y = 0;;) { + if (str[i] == '"' && !escaped) { + break; + } + if (str[i] == '\\' && !escaped) { + escaped = true; + continue; + } + if (str[i] == '\\' && escaped) { + escaped = false; + } + buf[y++] = str[i++]; + } + + token_t tok = { + .type = TT_PUSH_STR, + .str_v = buf, + }; + dynarray_push(tokens, tok); + } break; default: { // TODO: manage memory better // INFO: If you make an ident longer than 4kb i will murder you @@ -44,6 +123,15 @@ token_t* tokenise_string(char* file_path, char* str) { buf = realloc(buf, strlen(buf) + 1); bool found = false; + int num; + if (parse_int(buf, &num) == 0) { + token_t tok = { + .type = TT_PUSH_INT, + .int_v = num, + }; + dynarray_push(tokens, tok); + goto loop_end; + } for (int i = 1; i < KW_COUNT__; i++) { if (strcmp(KW_LIST[i], buf) == 0) { token_t tok = { @@ -51,13 +139,10 @@ token_t* tokenise_string(char* file_path, char* str) { .kw_type = (kw_type_t)i, }; dynarray_push(tokens, tok); - found = true; + goto loop_end; } } - if (found) { - break; - } for (int i = 1; i < OP_COUNT__; i++) { if (strcmp(OP_LIST[i], buf) == 0) { token_t tok = { @@ -65,19 +150,16 @@ token_t* tokenise_string(char* file_path, char* str) { .op_type = (op_type_t)i, }; dynarray_push(tokens, tok); - found = true; + goto loop_end; } } - if (found) { - break; - } token_t tok = { .type = TT_IDENT, .str_v = strdup(buf), }; dynarray_push(tokens, tok); - + loop_end: free(buf); } } diff --git a/test.mrph b/test.mrph index 45242ec..095766f 100644 --- a/test.mrph +++ b/test.mrph @@ -2,4 +2,6 @@ fn main with void returns int do 34 35 add __print__ + + "hewo world" println end