commit 77a1e74526ac304db4dc6abcf749551040a8146f Author: MCorange Date: Mon Jun 23 22:01:10 2025 +0300 Initial diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..9ca6f4b --- /dev/null +++ b/.clang-format @@ -0,0 +1,236 @@ +--- +#Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAfterAttributes: Never +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Attach +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 160 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: true +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Always +ShortNamespaceLines: 1 +SortIncludes: Never +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b838b9e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.cache/clangd/index +build +compile_commands.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7ab6c87 --- /dev/null +++ b/Makefile @@ -0,0 +1,38 @@ +BIN=cmplx + +BUILD_DIR?=build + +COM_FLAGS = -fPIC -Isrc/include $(shell pkg-config --cflags libcx) +CC_FLAGS = -std=c23 -ggdb +CXX_FLAGS = -std=c++23 -nostdinc++ -ggdb +LD_FLAGS = -nostdlib++ -lsupc++ -ggdb $(shell pkg-config --libs libcx) +CC=clang +CXX=clang +LD=clang +AR=ar + +cxx_sources=$(wildcard src/*.cpp) +c_sources=$(wildcard src/*.c) +objects=$(patsubst src/%.cpp,$(BUILD_DIR)/obj/%.cpp.o,$(cxx_sources)) $(patsubst src/%.c,$(BUILD_DIR)/obj/%.c.o,$(c_sources)) + + +all: $(BUILD_DIR)/$(BIN) compile_commands.json + + +$(BUILD_DIR)/$(BIN): $(objects) + $(LD) -o $@ $^ $(LD_FLAGS) + +clean: + rm -r $(BUILD_DIR)/obj/ + +$(BUILD_DIR)/obj/%.cpp.o: */%.cpp + @mkdir -p $(dir $@) + $(CXX) -c -o $@ $< $(CXX_FLAGS) $(COM_FLAGS) + +$(BUILD_DIR)/obj/%.c.o: */%.c + @mkdir -p $(dir $@) + $(CC) -c -o $@ $< $(CC_FLAGS) $(COM_FLAGS) + + +compile_commands.json: + compiledb -n make diff --git a/src/dynarray.c b/src/dynarray.c new file mode 100644 index 0000000..6e41fff --- /dev/null +++ b/src/dynarray.c @@ -0,0 +1,75 @@ + +#include "dynarray.h" + +/* + +Dynamic Array + +A dynarray has three hidden fields of type `size_t` stored in it's header: + - capacity: size in `stride`-sized units of the allocated buffer. + - length: the number of `stride`-sized units currently filled. + - stride: the sizeof the datatype being stored in the dynarray. + +To get the ith element in the array, you can use bracket notation (`arr[i]`), +or the `dynarray_get` method which does bounds checking. + +To set the ith element of the array, use either bracket notation +(`arr[i] = x;`), or the `dynarray_set` method which does bounds checking. +*/ + +// Returns a pointer to the start of a new dynarray (after the header) which +// has `init_cap` units of `stride` bytes. +void *_dynarray_create(size_t init_cap, size_t stride) { + size_t header_size = DYNARRAY_FIELDS * sizeof(size_t); + size_t arr_size = init_cap * stride; + size_t *arr = (size_t *)malloc(header_size + arr_size); + arr[CAPACITY] = init_cap; + arr[LENGTH] = 0; + arr[STRIDE] = stride; + return (void *)(arr + DYNARRAY_FIELDS); +} + +void _dynarray_destroy(void *arr) { + free(arr - DYNARRAY_FIELDS * sizeof(size_t)); +} + +// Returns the dynarray's field which is specified by passing +// one of CAPACITY, LENGTH, STRIDE. +size_t _dynarray_field_get(void *arr, size_t field) { + return ((size_t *)(arr)-DYNARRAY_FIELDS)[field]; +} + +void _dynarray_field_set(void *arr, size_t field, size_t value) { + ((size_t *)(arr)-DYNARRAY_FIELDS)[field] = value; +} + +// Allocates a new dynarray with twice the size of the one passed in, and +// retaining the values that the original stored. +void *_dynarray_resize(void *arr) { + void *temp = _dynarray_create( // Allocate new dynarray w/ more space. + DYNARRAY_RESIZE_FACTOR * dynarray_capacity(arr), dynarray_stride(arr)); + memcpy(temp, arr, + dynarray_length(arr) * dynarray_stride(arr)); // Copy erythin' over. + _dynarray_field_set(temp, LENGTH, + dynarray_length(arr)); // Set `length` field. + _dynarray_destroy(arr); // Free previous array. + return temp; +} + +void *_dynarray_push(void *arr, void *xptr) { + if (dynarray_length(arr) >= dynarray_capacity(arr)) + arr = _dynarray_resize(arr); + + memcpy(arr + dynarray_length(arr) * dynarray_stride(arr), xptr, + dynarray_stride(arr)); + _dynarray_field_set(arr, LENGTH, dynarray_length(arr) + 1); + return arr; +} + +// Removes the last element in the array, but copies it to `*dest` first. +void _dynarray_pop(void *arr, void *dest) { + memcpy(dest, arr + (dynarray_length(arr) - 1) * dynarray_stride(arr), + dynarray_stride(arr)); + _dynarray_field_set(arr, LENGTH, + dynarray_length(arr) - 1); // Decrement length. +} diff --git a/src/include/dynarray.h b/src/include/dynarray.h new file mode 100644 index 0000000..311dc10 --- /dev/null +++ b/src/include/dynarray.h @@ -0,0 +1,49 @@ +// Taken from https://github.com/eignnx/dynarray + +#ifndef _H_DYNARRAY +#define _H_DYNARRAY + +#include // malloc +#include // memcpy + +/* Structure of a dynarray: + * size_t capacity + * size_t length + * size_t stride + * void *memory + */ + +enum { CAPACITY, LENGTH, STRIDE, DYNARRAY_FIELDS }; + +void* _dynarray_create(size_t length, size_t stride); +void _dynarray_destroy(void* arr); + +size_t _dynarray_field_get(void* arr, size_t field); +void _dynarray_field_set(void* arr, size_t field, size_t value); + +void* _dynarray_resize(void* arr); + +void* _dynarray_push(void* arr, void* xptr); +void _dynarray_pop(void* arr, void* dest); + +#define DYNARRAY_DEFAULT_CAP 1 +#define DYNARRAY_RESIZE_FACTOR 2 + +#define dynarray_create(type) _dynarray_create(DYNARRAY_DEFAULT_CAP, sizeof(type)) +#define dynarray_create_prealloc(type, capacity) _dynarray_create(capacity, sizeof(type)) +#define dynarray_destroy(arr) _dynarray_destroy(arr) + +#define dynarray_push(arr, x) arr = _dynarray_push(arr, &x) +#define dynarray_push_rval(arr, x) \ + do { \ + __auto_type temp = x; \ + arr = _dynarray_push(arr, &temp); \ + } while (0) + +#define dynarray_pop(arr, xptr) _dynarray_pop(arr, xptr) + +#define dynarray_capacity(arr) _dynarray_field_get(arr, CAPACITY) +#define dynarray_length(arr) _dynarray_field_get(arr, LENGTH) +#define dynarray_stride(arr) _dynarray_field_get(arr, STRIDE) + +#endif // _H_DYNARRAY diff --git a/src/include/loc.h b/src/include/loc.h new file mode 100644 index 0000000..5eacfcf --- /dev/null +++ b/src/include/loc.h @@ -0,0 +1,10 @@ +#ifndef _H_MORPH_LOC +#define _H_MORPH_LOC + +typedef struct loc_s { + const char* file; + int line; + int col; +} loc_t; + +#endif // !_H_MORPH_LOC diff --git a/src/include/parser.h b/src/include/parser.h new file mode 100644 index 0000000..e69de29 diff --git a/src/include/token.h b/src/include/token.h new file mode 100644 index 0000000..002d110 --- /dev/null +++ b/src/include/token.h @@ -0,0 +1,117 @@ +#ifndef _H_MORPH_TOKEN +#define _H_MORPH_TOKEN + +#include "loc.h" +#include +#include + +typedef enum token_type_e { + TT_NONE = 0, + TT_KW, + TT_OP, + TT_PUSH_STR, + TT_PUSH_CSTR, + TT_PUSH_MEM, + TT_PUSH_CHAR, + TT_PUSH_INT, + TT_PUSH_FLOAT, +} token_type_t; + +typedef enum kw_type_e { + // Include will be tokeniser level + KW_NONE = 0, + KW_FN, + KW_DO, + KW_END, + KW_WITH, + KW_RETURNS, + KW_STRUCT, + KW_ENUM, + KW_IF, + KW_ELSE, + KW_WHILE, + KW_CONST, + KW_MEMORY +} kw_type_t; + +typedef enum op_type_e { + OP_NONE = 0, + // Math + OP_ADD, + OP_SUB, + OP_MUL, + OP_DIV, + OP_MOD, + // Comparison + OP_EQ, + OP_GT, + OP_LT, + OP_GE, + OP_LE, + OP_NE, + + // Bit manipulation + OP_SHR, + OP_SHL, + OP_OR, + OP_AND, + OP_NOT, + + // stack ops + OP_DUP, + OP_SWAP, + OP_DROP, + OP_OVER, + OP_ROT, + + // memory + // NOTE: Even if you load a 1 byte value into the stack, + // it will still take up the full 8 bytes (or 4, if you're using a 32 bit system) + OP_LOAD8, + OP_STORE8, + OP_LOAD16, + OP_STORE16, + OP_LOAD32, + OP_STORE32, + OP_LOAD64, + OP_STORE64, + + // syscalls + OP_SYSCALL0, + OP_SYSCALL1, + OP_SYSCALL2, + OP_SYSCALL3, + OP_SYSCALL4, + OP_SYSCALL5, + OP_SYSCALL6, + + // Builtins/Internals + OP_ARGC, + OP_ARGV, + OP_CAST_PTR, + OP_CAST_INT, + OP_CAST_BOOL, + OP_HERE, + OP_PRINT, +} op_type_t; + +typedef struct token_s { + token_type_t type; + loc_t loc; + + union { + op_type_t op_type; + kw_type_t kw_type; + // NOTE: PUSH_CSTR and PUSH_STR will use the same data, + // it will just output slightly different assembly + const char* str_v; + const char char_v; + ssize_t int_v; + // NOTE: For PUSH_MEM. It will push the offset of the memory, + // that is assigned for that specific memory, from the `memory` label. + size_t offset_v; + double float_v; + }; +} token_t; + +#endif // _H_MORPH_TOKEN diff --git a/src/include/tokeniser.h b/src/include/tokeniser.h new file mode 100644 index 0000000..5cc47ae --- /dev/null +++ b/src/include/tokeniser.h @@ -0,0 +1,7 @@ +#ifndef _H_MORPH_TOKENISER +#define _H_MORPH_TOKENISER +#include + +token_t* tokenise_string(char* file_path, char* str); + +#endif // !_H_MORPH_TOKENISER diff --git a/src/include/util.h b/src/include/util.h new file mode 100644 index 0000000..7685c87 --- /dev/null +++ b/src/include/util.h @@ -0,0 +1,5 @@ +#ifndef _H_MORPH_UTIL +#define _H_MORPH_UTIL + +char* read_to_string(const char* filename); +#endif // !_H_MORPH_UTIL diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..e2c0387 --- /dev/null +++ b/src/main.c @@ -0,0 +1,3 @@ + + +int main(int argc, const char **argv) { return 0; } diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..e69de29 diff --git a/src/tokeniser.c b/src/tokeniser.c new file mode 100644 index 0000000..3f92f3d --- /dev/null +++ b/src/tokeniser.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include + +token_t* tokenise_string(char* file_path, char* str) { + const int str_len = strlen(str); + loc_t loc = {0}; + loc.file = file_path; + + token_t* tokens = dynarray_create(token_t); + + for (int i = 0; i < str_len; i++) { + switch (str[i]) { + case (' '): + case ('\t'): + case ('\r'): { + loc.col += 1; + continue; + } + case ('\n'): { + loc.col = 0; + loc.line += 1; + continue; + } + default: { + // TODO: manage memory better + // INFO: If you make an ident longer than 4kb i will murder you + char* buf = malloc(1024 * 4); + int buf_counter = 1; + *buf = str[i]; + while (i < str_len && (str[i + 1] != ' ' || str[i + 1] != '\t' || str[i + 1] != '\n' || str[i + 1] != '\r')) { + i += 1; + buf[buf_counter++] = str[i]; + } + buf = realloc(buf, strlen(buf) + 1); + } + } + } + return NULL; +} diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..98dbded --- /dev/null +++ b/src/util.c @@ -0,0 +1,28 @@ +#include +#include + +#include + +// WARN: Created with chatgpt +char* read_to_string(const char* filename) { + FILE* file = fopen(filename, "rb"); + if (!file) { + return NULL; + } + + fseek(file, 0, SEEK_END); + long length = ftell(file); + rewind(file); + + char* buffer = malloc(length + 1); // +1 for null terminator :3 + if (!buffer) { + fclose(file); + return NULL; + } + + size_t read = fread(buffer, 1, length, file); + fclose(file); + + buffer[read] = '\0'; // null terminate just in case owo + return buffer; +}