From 40ab06b08b36108b2b766f81ae0bf3e42e9c5534 Mon Sep 17 00:00:00 2001 From: MCorange99 Date: Thu, 14 Mar 2024 09:13:01 +0200 Subject: [PATCH] Initial --- Cargo.lock | 320 ++++++++++++++ Cargo.toml | 18 + REF.md | 55 +++ include/linux/io.mcl | 14 + include/linux/linux.mcl | 2 + include/linux/syscalls.mcl | 322 ++++++++++++++ include/std.mcl | 2 + include/types.mcl | 4 + src/cli.rs | 125 ++++++ src/compiler/mod.rs | 84 ++++ src/compiler/utils.rs | 36 ++ src/compiler/x86_64_linux_nasm/mod.rs | 485 +++++++++++++++++++++ src/compiler/x86_64_linux_nasm/utils.rs | 37 ++ src/lexer/mod.rs | 302 +++++++++++++ src/logger/colors.rs | 32 ++ src/logger/macros.rs | 106 +++++ src/logger/mod.rs | 83 ++++ src/logger/types.rs | 40 ++ src/main.rs | 47 ++ src/parser/builtin.rs | 46 ++ src/parser/mod.rs | 551 ++++++++++++++++++++++++ src/parser/precompiler.rs | 154 +++++++ src/parser/utils.rs | 100 +++++ src/types/ast/mod.rs | 148 +++++++ src/types/common.rs | 37 ++ src/types/mod.rs | 3 + src/types/token/mod.rs | 120 ++++++ test.mcl | 39 ++ 28 files changed, 3312 insertions(+) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 REF.md create mode 100644 include/linux/io.mcl create mode 100644 include/linux/linux.mcl create mode 100644 include/linux/syscalls.mcl create mode 100644 include/std.mcl create mode 100644 include/types.mcl create mode 100644 src/cli.rs create mode 100644 src/compiler/mod.rs create mode 100644 src/compiler/utils.rs create mode 100644 src/compiler/x86_64_linux_nasm/mod.rs create mode 100644 src/compiler/x86_64_linux_nasm/utils.rs create mode 100644 src/lexer/mod.rs create mode 100644 src/logger/colors.rs create mode 100644 src/logger/macros.rs create mode 100644 src/logger/mod.rs create mode 100644 src/logger/types.rs create mode 100644 src/main.rs create mode 100644 src/parser/builtin.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/precompiler.rs create mode 100644 src/parser/utils.rs create mode 100644 src/types/ast/mod.rs create mode 100644 src/types/common.rs create mode 100644 src/types/mod.rs create mode 100644 src/types/token/mod.rs create mode 100644 test.mcl diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f609913 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,320 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" + +[[package]] +name = "clap" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "map-macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb950a42259642e5a3483115aca87eebed2a64886993463af9c9739c205b8d3a" + +[[package]] +name = "mclangc-v2" +version = "0.1.0" +dependencies = [ + "anyhow", + "bitflags", + "camino", + "clap", + "lazy_static", + "map-macro", + "parse_int", + "snailquote", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "parse_int" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d695b79916a2c08bcff7be7647ab60d1402885265005a6658ffe6d763553c5a" +dependencies = [ + "num-traits", +] + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "snailquote" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec62a949bda7f15800481a711909f946e1204f2460f89210eaf7f57730f88f86" +dependencies = [ + "thiserror", + "unicode_categories", +] + +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + +[[package]] +name = "syn" +version = "2.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..38e3f3e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "mclangc-v2" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.80" +bitflags = "2.4.2" +camino = "1.1.6" +clap = { version = "4.5.2", features = ["derive"] } +lazy_static = "1.4.0" +map-macro = "0.3.0" +parse_int = "0.6.0" +# serde = { version = "1.0.197", features = ["derive"] } +# regex = "1.10.3" +snailquote = "0.3.1" diff --git a/REF.md b/REF.md new file mode 100644 index 0000000..3e2224a --- /dev/null +++ b/REF.md @@ -0,0 +1,55 @@ +# Reference + +```mclang +typedef str do int ptr end // [int, ptr] + +include "std.mcl" + +const sizeof(u8) 1 end +const sizeof(u16) 2 end +const sizeof(u32) 4 end +const sizeof(u64) 8 end + +structdef Foo do + buz do sizeof(u64) end + baz do sizeof(u64) end +done + +memory s_foo Foo end + +//? Comments :3 + +extern fn a with void returns void then done +inline fn b with void returns void then done +export fn c with void returns void then done + +fn puts with str returns void then drop drop done +// fn putd with int returns void then drop done + +fn main with int ptr returns int then + // 1 2 add + 69 _dbg_print + "Hewo" puts + + if 3 4 eq do + "omg what impossible!\n" + else if 1 1 eq do + "whaaaaaaaaa\n" + else + "finally, some good soup\n" + done + puts + + 10 + while dup 0 gt do + "uwu" puts + dup _dbg_print + 1 + done + +done + + +``` + + diff --git a/include/linux/io.mcl b/include/linux/io.mcl new file mode 100644 index 0000000..de2f942 --- /dev/null +++ b/include/linux/io.mcl @@ -0,0 +1,14 @@ + +fn fwrite with int ptr int returns int then + SYS_write syscall3 +done + + + +fn puts with int ptr int returns int then + STDOUT fwrite drop +done + +fn eputs with int ptr int returns int then + STDERR fwrite drop +done \ No newline at end of file diff --git a/include/linux/linux.mcl b/include/linux/linux.mcl new file mode 100644 index 0000000..dca656f --- /dev/null +++ b/include/linux/linux.mcl @@ -0,0 +1,2 @@ +include "linux/syscalls.mcl" +include "linux/io.mcl" \ No newline at end of file diff --git a/include/linux/syscalls.mcl b/include/linux/syscalls.mcl new file mode 100644 index 0000000..6b459b9 --- /dev/null +++ b/include/linux/syscalls.mcl @@ -0,0 +1,322 @@ + +// file descriptors +const STDIN 0 end +const STDOUT 1 end +const STDERR 2 end + + +// syscalls +const SYS_read 0 end +const SYS_write 1 end +const SYS_open 2 end +const SYS_close 3 end +const SYS_stat 4 end +const SYS_fstat 5 end +const SYS_lstat 6 end +const SYS_poll 7 end +const SYS_lseek 8 end +const SYS_mmap 9 end +const SYS_mprotect 10 end +const SYS_munmap 11 end +const SYS_brk 12 end +const SYS_rt_sigaction 13 end +const SYS_rt_sigprocmask 14 end +const SYS_rt_sigreturn 15 end +const SYS_ioctl 16 end +const SYS_pread64 17 end +const SYS_pwrite64 18 end +const SYS_readv 19 end +const SYS_writev 20 end +const SYS_access 21 end +const SYS_pipe 22 end +const SYS_select 23 end +const SYS_sched_yield 24 end +const SYS_mremap 25 end +const SYS_msync 26 end +const SYS_mincore 27 end +const SYS_madvise 28 end +const SYS_shmget 29 end +const SYS_shmat 30 end +const SYS_shmctl 31 end +const SYS_dup 32 end +const SYS_dup2 33 end +const SYS_pause 34 end +const SYS_nanosleep 35 end +const SYS_getitimer 36 end +const SYS_alarm 37 end +const SYS_setitimer 38 end +const SYS_getpid 39 end +const SYS_sendfile 40 end +const SYS_socket 41 end +const SYS_connect 42 end +const SYS_accept 43 end +const SYS_sendto 44 end +const SYS_recvfrom 45 end +const SYS_sendmsg 46 end +const SYS_recvmsg 47 end +const SYS_shutdown 48 end +const SYS_bind 49 end +const SYS_listen 50 end +const SYS_getsockname 51 end +const SYS_getpeername 52 end +const SYS_socketpair 53 end +const SYS_setsockopt 54 end +const SYS_getsockopt 55 end +const SYS_clone 56 end +const SYS_fork 57 end +const SYS_vfork 58 end +const SYS_execve 59 end +const SYS_exit 60 end +const SYS_wait4 61 end +const SYS_kill 62 end +const SYS_uname 63 end +const SYS_semget 64 end +const SYS_semop 65 end +const SYS_semctl 66 end +const SYS_shmdt 67 end +const SYS_msgget 68 end +const SYS_msgsnd 69 end +const SYS_msgrcv 70 end +const SYS_msgctl 71 end +const SYS_fcntl 72 end +const SYS_flock 73 end +const SYS_fsync 74 end +const SYS_fdatasync 75 end +const SYS_truncate 76 end +const SYS_ftruncate 77 end +const SYS_getdents 78 end +const SYS_getcwd 79 end +const SYS_chdir 80 end +const SYS_fchdir 81 end +const SYS_rename 82 end +const SYS_mkdir 83 end +const SYS_rmdir 84 end +const SYS_creat 85 end +const SYS_link 86 end +const SYS_unlink 87 end +const SYS_symlink 88 end +const SYS_readlink 89 end +const SYS_chmod 90 end +const SYS_fchmod 91 end +const SYS_chown 92 end +const SYS_fchown 93 end +const SYS_lchown 94 end +const SYS_umask 95 end +const SYS_gettimeofday 96 end +const SYS_getrlimit 97 end +const SYS_getrusage 98 end +const SYS_sysinfo 99 end +const SYS_times 100 end +const SYS_ptrace 101 end +const SYS_getuid 102 end +const SYS_syslog 103 end +const SYS_getgid 104 end +const SYS_setuid 105 end +const SYS_setgid 106 end +const SYS_geteuid 107 end +const SYS_getegid 108 end +const SYS_setpgid 109 end +const SYS_getppid 110 end +const SYS_getpgrp 111 end +const SYS_setsid 112 end +const SYS_setreuid 113 end +const SYS_setregid 114 end +const SYS_getgroups 115 end +const SYS_setgroups 116 end +const SYS_setresuid 117 end +const SYS_getresuid 118 end +const SYS_setresgid 119 end +const SYS_getresgid 120 end +const SYS_getpgid 121 end +const SYS_setfsuid 122 end +const SYS_setfsgid 123 end +const SYS_getsid 124 end +const SYS_capget 125 end +const SYS_capset 126 end +const SYS_rt_sigpending 127 end +const SYS_rt_sigtimedwait 128 end +const SYS_rt_sigqueueinfo 129 end +const SYS_rt_sigsuspend 130 end +const SYS_sigaltstack 131 end +const SYS_utime 132 end +const SYS_mknod 133 end +const SYS_uselib 134 end +const SYS_personality 135 end +const SYS_ustat 136 end +const SYS_statfs 137 end +const SYS_fstatfs 138 end +const SYS_sysfs 139 end +const SYS_getpriority 140 end +const SYS_setpriority 141 end +const SYS_sched_setparam 142 end +const SYS_sched_getparam 143 end +const SYS_sched_setscheduler 144 end +const SYS_sched_getscheduler 145 end +const SYS_sched_get_priority_max 146 end +const SYS_sched_get_priority_min 147 end +const SYS_sched_rr_get_interval 148 end +const SYS_mlock 149 end +const SYS_munlock 150 end +const SYS_mlockall 151 end +const SYS_munlockall 152 end +const SYS_vhangup 153 end +const SYS_modify_ldt 154 end +const SYS_pivot_root 155 end +const SYS__sysctl 156 end +const SYS_prctl 157 end +const SYS_arch_prctl 158 end +const SYS_adjtimex 159 end +const SYS_setrlimit 160 end +const SYS_chroot 161 end +const SYS_sync 162 end +const SYS_acct 163 end +const SYS_settimeofday 164 end +const SYS_mount 165 end +const SYS_umount2 166 end +const SYS_swapon 167 end +const SYS_swapoff 168 end +const SYS_reboot 169 end +const SYS_sethostname 170 end +const SYS_setdomainname 171 end +const SYS_iopl 172 end +const SYS_ioperm 173 end +const SYS_create_module 174 end +const SYS_init_module 175 end +const SYS_delete_module 176 end +const SYS_get_kernel_syms 177 end +const SYS_query_module 178 end +const SYS_quotactl 179 end +const SYS_nfsservctl 180 end +const SYS_getpmsg 181 end +const SYS_putpmsg 182 end +const SYS_afs_syscall 183 end +const SYS_tuxcall 184 end +const SYS_security 185 end +const SYS_gettid 186 end +const SYS_readahead 187 end +const SYS_setxattr 188 end +const SYS_lsetxattr 189 end +const SYS_fsetxattr 190 end +const SYS_getxattr 191 end +const SYS_lgetxattr 192 end +const SYS_fgetxattr 193 end +const SYS_listxattr 194 end +const SYS_llistxattr 195 end +const SYS_flistxattr 196 end +const SYS_removexattr 197 end +const SYS_lremovexattr 198 end +const SYS_fremovexattr 199 end +const SYS_tkill 200 end +const SYS_time 201 end +const SYS_futex 202 end +const SYS_sched_setaffinity 203 end +const SYS_sched_getaffinity 204 end +const SYS_set_thread_area 205 end +const SYS_io_setup 206 end +const SYS_io_destroy 207 end +const SYS_io_getevents 208 end +const SYS_io_submit 209 end +const SYS_io_cancel 210 end +const SYS_get_thread_area 211 end +const SYS_lookup_dcookie 212 end +const SYS_epoll_create 213 end +const SYS_epoll_ctl_old 214 end +const SYS_epoll_wait_old 215 end +const SYS_remap_file_pages 216 end +const SYS_getdents64 217 end +const SYS_set_tid_address 218 end +const SYS_restart_syscall 219 end +const SYS_semtimedop 220 end +const SYS_fadvise64 221 end +const SYS_timer_create 222 end +const SYS_timer_settime 223 end +const SYS_timer_gettime 224 end +const SYS_timer_getoverrun 225 end +const SYS_timer_delete 226 end +const SYS_clock_settime 227 end +const SYS_clock_gettime 228 end +const SYS_clock_getres 229 end +const SYS_clock_nanosleep 230 end +const SYS_exit_group 231 end +const SYS_epoll_wait 232 end +const SYS_epoll_ctl 233 end +const SYS_tgkill 234 end +const SYS_utimes 235 end +const SYS_vserver 236 end +const SYS_mbind 237 end +const SYS_set_mempolicy 238 end +const SYS_get_mempolicy 239 end +const SYS_mq_open 240 end +const SYS_mq_unlink 241 end +const SYS_mq_timedsend 242 end +const SYS_mq_timedreceive 243 end +const SYS_mq_notify 244 end +const SYS_mq_getsetattr 245 end +const SYS_kexec_load 246 end +const SYS_waitid 247 end +const SYS_add_key 248 end +const SYS_request_key 249 end +const SYS_keyctl 250 end +const SYS_ioprio_set 251 end +const SYS_ioprio_get 252 end +const SYS_inotify_init 253 end +const SYS_inotify_add_watch 254 end +const SYS_inotify_rm_watch 255 end +const SYS_migrate_pages 256 end +const SYS_openat 257 end +const SYS_mkdirat 258 end +const SYS_mknodat 259 end +const SYS_fchownat 260 end +const SYS_futimesat 261 end +const SYS_newfstatat 262 end +const SYS_unlinkat 263 end +const SYS_renameat 264 end +const SYS_linkat 265 end +const SYS_symlinkat 266 end +const SYS_readlinkat 267 end +const SYS_fchmodat 268 end +const SYS_faccessat 269 end +const SYS_pselect6 270 end +const SYS_ppoll 271 end +const SYS_unshare 272 end +const SYS_set_robust_list 273 end +const SYS_get_robust_list 274 end +const SYS_splice 275 end +const SYS_tee 276 end +const SYS_sync_file_range 277 end +const SYS_vmsplice 278 end +const SYS_move_pages 279 end +const SYS_utimensat 280 end +const SYS_epoll_pwait 281 end +const SYS_signalfd 282 end +const SYS_timerfd_create 283 end +const SYS_eventfd 284 end +const SYS_fallocate 285 end +const SYS_timerfd_settime 286 end +const SYS_timerfd_gettime 287 end +const SYS_accept4 288 end +const SYS_signalfd4 289 end +const SYS_eventfd2 290 end +const SYS_epoll_create1 291 end +const SYS_dup3 292 end +const SYS_pipe2 293 end +const SYS_inotify_init1 294 end +const SYS_preadv 295 end +const SYS_pwritev 296 end +const SYS_rt_tgsigqueueinfo 297 end +const SYS_perf_event_open 298 end +const SYS_recvmmsg 299 end +const SYS_fanotify_init 300 end +const SYS_fanotify_mark 301 end +const SYS_prlimit64 302 end +const SYS_name_to_handle_at 303 end +const SYS_open_by_handle_at 304 end +const SYS_clock_adjtime 305 end +const SYS_syncfs 306 end +const SYS_sendmmsg 307 end +const SYS_setns 308 end +const SYS_getcpu 309 end +const SYS_process_vm_readv 310 end +const SYS_process_vm_writev 311 end +const SYS_kcmp 312 end +const SYS_finit_module 313 end \ No newline at end of file diff --git a/include/std.mcl b/include/std.mcl new file mode 100644 index 0000000..7f30f81 --- /dev/null +++ b/include/std.mcl @@ -0,0 +1,2 @@ +include "linux/linux.mcl" +include "types.mcl" \ No newline at end of file diff --git a/include/types.mcl b/include/types.mcl new file mode 100644 index 0000000..8989182 --- /dev/null +++ b/include/types.mcl @@ -0,0 +1,4 @@ +const sizeof(u8) 1 end +const sizeof(u16) 2 end +const sizeof(u32) 4 end +const sizeof(u64) 8 end \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..41bc54d --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,125 @@ +use clap::{builder::PossibleValue, Parser, ValueEnum}; +use camino::Utf8PathBuf; +lazy_static::lazy_static! { + static ref DEFAULT_INCLUDE_PATHS: Vec = vec![ + Utf8PathBuf::from("./"), + Utf8PathBuf::from("./include"), + Utf8PathBuf::from("~/.mclang/include"), + ]; +} + +#[derive(Debug, Parser)] +pub struct CliArgs { + /// Only compile, dont link + #[arg(long, short)] + pub compile: bool, + + /// Verosity + /// -1 - Nothing + /// 0 - Only errors + /// 1 - Normal + /// 2 - Verbose + /// 3 - Tracing + #[arg(long, short, default_value_t=1)] + pub verbose: i8, + + /// Runt the program after compilation + #[arg(long, short)] + pub run: bool, + + /// Output execuable file path + #[arg(long, short, default_value="./a.out")] + pub output: Utf8PathBuf, + + /// Paths to search for libraries + #[arg(long="include", short='I', default_values_t=DEFAULT_INCLUDE_PATHS.clone().into_iter())] + pub include_path: Vec, + + /// Target to compile to + #[arg(long, short='T', default_value_t=CompilationTarget::X86_64_linux_nasm)] + pub target: CompilationTarget, + + /// Input code files + pub input: Vec, + + #[clap(skip)] + pub passthrough: Vec +} + +impl CliArgs { + pub fn parse_with_passthrough() -> Self { + let mut clap_args = Vec::new(); + let mut pt_args = Vec::new(); + let mut switch = false; + for arg in std::env::args() { + if arg == String::from("--") { + switch = true; + continue; + } + + if !switch { + //clap args + clap_args.push(arg); + } else { + // passwthrough + pt_args.push(arg); + } + } + + let mut cargs = Self::parse_from(clap_args); + cargs.passthrough = pt_args; + + cargs + } +} + + + +#[allow(non_camel_case_types)] +#[derive(Debug, Clone)] +pub enum CompilationTarget { + X86_64_linux_nasm +} + +impl ValueEnum for CompilationTarget { + fn value_variants<'a>() -> &'a [Self] { + &[ + Self::X86_64_linux_nasm + ] + } + + fn to_possible_value(&self) -> Option { + match self { + CompilationTarget::X86_64_linux_nasm => Some(PossibleValue::new("x86_64-linux-nasm")), + } + } +} + +impl std::fmt::Display for CompilationTarget { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let r = match self { + CompilationTarget::X86_64_linux_nasm => "x86_64-linux-nasm", + }; + write!(f, "{}", r) + } +} + +// impl From for clap::builder::OsStr { +// fn from(value: CompilationTarget) -> Self { +// match value { +// CompilationTarget::X86_64_linux_nasm => "X86_64_linux_nasm".into() +// } +// } +// } + +// impl TryFrom<&str> for CompilationTarget { +// type Error = anyhow::Error; +// fn try_from(value: &str) -> Result { +// match value { +// "X86_64_linux_nasm" => Ok(CompilationTarget::X86_64_linux_nasm) +// _ => bail!("Unknown compilation target {value}") +// } +// } + +// } + diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs new file mode 100644 index 0000000..c1ea035 --- /dev/null +++ b/src/compiler/mod.rs @@ -0,0 +1,84 @@ +mod x86_64_linux_nasm; +mod utils; + +use anyhow::bail; + +use crate::{cli::{CliArgs, CompilationTarget}, types::ast::Program}; +use std::{collections::HashMap, fs::File, io::{BufWriter, Write}, path::{Path, PathBuf}}; + +use self::utils::run_cmd; + + +pub trait Compiler { + fn new() -> Self; + fn generate_asm(&mut self, prog: &Program, fd: &mut BufWriter) -> anyhow::Result<()>; + fn compile(&mut self, asm_fp: &Path, obj: &Path) -> anyhow::Result<()>; + fn link(&mut self, obj_files: Vec, bin_fp: &Path) -> anyhow::Result<()>; + /// Return programs that are needed + fn needed_dependencies(&mut self) -> Vec<&str>; +} + +//NOTE: No bsd cause im not about to create 3 or 4 diffrent compilation targets + +pub fn compile_program(cli_args: &CliArgs, prog_map: HashMap<&Path, Program>) -> anyhow::Result<()> { + let mut compiler = match cli_args.target { + CompilationTarget::X86_64_linux_nasm => x86_64_linux_nasm::X86_64LinuxNasmCompiler::new(), + }; + let bin_p = cli_args.output.as_std_path(); + let mut objs = Vec::new(); + for (k, v) in prog_map { + let mut asm_p = k.to_path_buf(); + let mut obj_p = k.to_path_buf(); + + asm_p.set_extension("s"); + obj_p.set_extension("o"); + + + if let Err(_) = compile_file(&mut compiler, cli_args, asm_p.as_path(), obj_p.as_path(), &v) { + error!("Failed to compile file {k:?}"); + bail!("") + } + objs.push(obj_p.clone()); + } + + if let Err(e) = compiler.link(objs, bin_p) { + error!("Failed to link program: {e}"); + bail!("") + } + + info!("Finished building program"); + + if cli_args.run { + run_cmd(format!("./{}", bin_p.to_string_lossy()), cli_args.passthrough.clone())?; + } + + + Ok(()) +} + +pub fn compile_file(compiler: &mut C, _: &CliArgs, asm_file: &Path, obj_file: &Path, prog: &Program) -> anyhow::Result<()> { + + let asm_fd = std::fs::File::options() + .write(true) + .write(true) + .create(true) + .truncate(true) + .append(false) + .open(asm_file); + + let asm_fd = match asm_fd { + Ok(fd) => fd, + Err(e) => { + error!("Failed to open file {asm_file:?}: {e}"); + bail!(""); + } + }; + + let mut buf_asm_fd = BufWriter::new(asm_fd); + + compiler.generate_asm(prog, &mut buf_asm_fd)?; + buf_asm_fd.flush()?; + + compiler.compile(asm_file, obj_file)?; + Ok(()) +} \ No newline at end of file diff --git a/src/compiler/utils.rs b/src/compiler/utils.rs new file mode 100644 index 0000000..ede944c --- /dev/null +++ b/src/compiler/utils.rs @@ -0,0 +1,36 @@ +use std::{fmt::Debug, process::{Command, Stdio}}; + +use anyhow::bail; + + + +pub fn run_cmd<'a, S: Into + Debug + Clone>(bin: S, args: Vec) -> anyhow::Result<()> { + let debug = unsafe { + crate::logger::LOGGER.enabled(crate::logger::Level::Debug) + }; + let mut cmd = Command::new(bin.clone().into()); + let cmd = cmd.args(args); + let cmd = if debug { + cmd.stdout(Stdio::inherit()) + } else { + cmd.stdout(Stdio::null()) + }; + let cmd = cmd.stderr(Stdio::inherit()); + + let child = match cmd.spawn() { + Ok(c) => c, + Err(e) => { + error!("Unable to run {cmd:?}: {e}"); + bail!(""); + } + }; + let ret = child.wait_with_output().expect("fuck i know"); + + + if !ret.status.success() { + error!("Process running {bin:?} exited abnormaly, run with -v 2 for more output"); + bail!("") + } + + Ok(()) +} \ No newline at end of file diff --git a/src/compiler/x86_64_linux_nasm/mod.rs b/src/compiler/x86_64_linux_nasm/mod.rs new file mode 100644 index 0000000..1e8d757 --- /dev/null +++ b/src/compiler/x86_64_linux_nasm/mod.rs @@ -0,0 +1,485 @@ +mod utils; + +use std::path::PathBuf; +use std::{fs::File, io::BufWriter, path::Path}; +use std::io::Write; +use crate::types::ast::{AstNode, Function, Module, Program}; +use crate::types::token::{InstructionType, Token, TokenType}; + +use super::utils::run_cmd; +use super::Compiler; + + + + +pub struct X86_64LinuxNasmCompiler { + strings: Vec, + if_i: usize, + while_i: usize, + used_consts: Vec +} + +impl X86_64LinuxNasmCompiler { + fn handle_token(&mut self, fd: &mut BufWriter, _: &Program, token: &Token) -> anyhow::Result<()> { + match &token.typ { + TokenType::Instruction(it) => { + match it { + InstructionType::PushInt(i) => { + writeln!(fd, " mov rax, {i} ; PUSHINT({i})")?; + writeln!(fd, " push rax")?; + }, + InstructionType::PushStr(s) => { + writeln!(fd, " push {}", s.len())?; + writeln!(fd, " push str_{}; PUSHSTR({})", self.strings.len(), s.escape_debug())?; + self.strings.push(s.clone()); + }, + InstructionType::PushCStr(s) => { + writeln!(fd, " push str_{}; PUSHCSTR({})", self.strings.len(), s.escape_debug())?; + self.strings.push(s.clone()); + }, + InstructionType::PushChar(c) => { + writeln!(fd, " push {}; PUSHCHAR({})", *c as u8, c.escape_debug())?; + }, + InstructionType::Drop => { + writeln!(fd, " pop rax ; DROP")?; + }, + InstructionType::Print => { + writeln!(fd, " pop rdi")?; + writeln!(fd, " call _dbg_print ; _DBG_PRINT")?; + }, + InstructionType::Dup => { + writeln!(fd, " pop rax ; DUP")?; + writeln!(fd, " push rax")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Rot => { + writeln!(fd, " pop rax ; ROT")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " pop rcx")?; + writeln!(fd, " push rbx")?; + writeln!(fd, " push rax")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::Over => { + writeln!(fd, " pop rax ; OVER")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " push rbx")?; + writeln!(fd, " push rax")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Swap => { + writeln!(fd, " pop rax ; SWAP")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " push rax")?; + writeln!(fd, " push rbx")?; + } + InstructionType::Minus => { + writeln!(fd, " pop rax ; SUB")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " sub rbx, rax")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Plus => { + writeln!(fd, " pop rax ; ADD")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " add rax, rbx")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Equals => { + writeln!(fd, " mov rcx, 0 ; EQ")?; + writeln!(fd, " mov rdx, 1")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " cmp rax, rbx")?; + writeln!(fd, " cmove rcx, rdx")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::Gt => { + writeln!(fd, " mov rcx, 0 ; GT")?; + writeln!(fd, " mov rdx, 1")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " cmp rax, rbx")?; + writeln!(fd, " cmovg rcx, rdx")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::Lt => { + writeln!(fd, " mov rcx, 0 ; LT")?; + writeln!(fd, " mov rdx, 1")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " cmp rax, rbx")?; + writeln!(fd, " cmovl rcx, rdx")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::Ge => { + writeln!(fd, " mov rcx, 0 ; GE")?; + writeln!(fd, " mov rdx, 1")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " cmp rax, rbx")?; + writeln!(fd, " cmovge rcx, rdx")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::Le => { + writeln!(fd, " mov rcx, 0 ; LE")?; + writeln!(fd, " mov rdx, 1")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " cmp rax, rbx")?; + writeln!(fd, " cmovle rcx, rdx")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::NotEquals => { + writeln!(fd, " mov rdx, 1 ; NEQ")?; + writeln!(fd, " mov rcx, 0")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " cmp rax, rbx")?; + writeln!(fd, " cmove rcx, rdx")?; + writeln!(fd, " push rcx")?; + }, + InstructionType::Band => { + writeln!(fd, " pop rax ; BAND")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " and rbx, rax")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Bor => { + writeln!(fd, " pop rax ; BOR")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " or rbx, rax")?; + writeln!(fd, " push rbx")?; + } + InstructionType::Shr => { + writeln!(fd, " pop rcx")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " shr rbx, cl")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Shl => { + writeln!(fd, " pop rcx")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " shl rbx, cl")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::DivMod => { + writeln!(fd, " xor rdx, rdx")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " div rbx")?; + writeln!(fd, " push rax")?; + writeln!(fd, " push rdx")?; + }, + InstructionType::Mul => { + writeln!(fd, " pop rax ; MUL")?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " mul rbx")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Read8 => { + writeln!(fd, " pop rax ; READ8")?; + writeln!(fd, " xor rbx, rbx")?; + writeln!(fd, " mov bl, byte [rax]")?; + writeln!(fd, " push rbx")?; + } + InstructionType::Write8 => { + writeln!(fd, " pop rax ; WRITE 8")?; + writeln!(fd, " xor rbx, rbx")?; + writeln!(fd, " mov ebx, dword [rax]")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Read32 => { + writeln!(fd, " pop rax ; READ 32")?; + writeln!(fd, " xor rbx, rbx")?; + writeln!(fd, " mov ebx, dword [rax]")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Write32 => { + writeln!(fd, " pop rbx ; WRITE 32")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " mov dword[rax], ebx")?; + }, + InstructionType::Read64 => { + writeln!(fd, " pop rax ; READ 32")?; + writeln!(fd, " xor rbx, rbx")?; + writeln!(fd, " mov rbx, qword [rax]")?; + writeln!(fd, " push rbx")?; + }, + InstructionType::Write64 => { + writeln!(fd, " pop rbx ; WRITE 64")?; + writeln!(fd, " pop rax")?; + writeln!(fd, " mov qword[rax], rbx")?; + }, + InstructionType::Syscall0 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Syscall1 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rdi")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Syscall2 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rdi")?; + writeln!(fd, " pop rsi")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Syscall3 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rdi")?; + writeln!(fd, " pop rsi")?; + writeln!(fd, " pop rdx")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Syscall4 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rdi")?; + writeln!(fd, " pop rsi")?; + writeln!(fd, " pop rdx")?; + writeln!(fd, " pop r10")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Syscall5 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rdi")?; + writeln!(fd, " pop rsi")?; + writeln!(fd, " pop rdx")?; + writeln!(fd, " pop r10")?; + writeln!(fd, " pop r8")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::Syscall6 => { + writeln!(fd, " pop rax")?; + writeln!(fd, " pop rdi")?; + writeln!(fd, " pop rsi")?; + writeln!(fd, " pop rdx")?; + writeln!(fd, " pop r10")?; + writeln!(fd, " pop r8")?; + writeln!(fd, " pop r9")?; + writeln!(fd, " syscall")?; + writeln!(fd, " push rax")?; + }, + InstructionType::CastBool | + InstructionType::CastPtr | + InstructionType::CastInt | + InstructionType::CastVoid => (), //? Possibly have a use for this + InstructionType::TypeBool | + InstructionType::TypePtr | + InstructionType::TypeInt | + InstructionType::TypeVoid | + InstructionType::TypeAny | + InstructionType::FnCall | + InstructionType::MemUse | + InstructionType::ConstUse => unreachable!(), + InstructionType::Return => { + writeln!(fd, " sub rbp, 8")?; + writeln!(fd, " mov rbx, qword [rbp]")?; + writeln!(fd, " push rbx")?; + writeln!(fd, " ret")?; + }, + } + }, + TokenType::Keyword(_) | + TokenType::Unknown(_) => unreachable!(), + } + Ok(()) + } + + fn handle_module(&mut self, fd: &mut BufWriter, prog: &Program, module: &Module) -> anyhow::Result<()> { + writeln!(fd, "; {} Module {} START", module.path.join("::"), module.ident)?; + self.handle_ast_list(fd, prog, module.body.clone())?; + writeln!(fd, "; {} Module {} END", module.path.join("::"), module.ident)?; + Ok(()) + } + + fn handle_function(&mut self, fd: &mut BufWriter, prog: &Program, func: &Function) -> anyhow::Result<()> { + writeln!(fd, "{f}: ; fn {f}", f=func.ident)?; + writeln!(fd, " pop rbx")?; + writeln!(fd, " mov qword [rbp], rbx")?; + writeln!(fd, " add rbp, 8")?; + + self.handle_ast_list(fd, prog, func.body.clone())?; + + writeln!(fd, " sub rbp, 8")?; + writeln!(fd, " mov rbx, qword [rbp]")?; + writeln!(fd, " push rbx")?; + writeln!(fd, " ret")?; + Ok(()) + } + + fn handle_ast_list(&mut self, fd: &mut BufWriter, prog: &Program, ast: Vec) -> anyhow::Result<()> { + for node in ast { + match &node { + AstNode::Function(f) => self.handle_function(fd, prog, f)?, + AstNode::Constant(_) => (), + AstNode::If(i) => { + let id = self.if_i; + self.if_i += 1; + + writeln!(fd, "; IF({id}) START")?; + self.handle_ast_list(fd, prog, i.test.clone())?; + writeln!(fd, " pop rax")?; + writeln!(fd, " test rax, rax")?; + writeln!(fd, " jz if_{id}_else")?; + writeln!(fd, "if_{id}_start:")?; + self.handle_ast_list(fd, prog, i.body.clone())?; + writeln!(fd, " jmp if_{id}_end")?; + writeln!(fd, "if_{id}_else:")?; + self.handle_ast_list(fd, prog, vec![Box::leak(i.els.clone()).clone()])?; + writeln!(fd, "if_{id}_end:")?; + writeln!(fd, "; IF({id}) END")?; + }, + AstNode::While(w) => { + let id = self.while_i; + self.while_i += 1; + writeln!(fd, "; WHILE({id}) START")?; + writeln!(fd, "while_{id}_test:")?; + self.handle_ast_list(fd, prog, w.test.clone())?; + writeln!(fd, " pop rax")?; + writeln!(fd, " test rax, rax")?; + writeln!(fd, " jz while_{id}_exit")?; + writeln!(fd, "while_{id}_start:")?; + self.handle_ast_list(fd, prog, w.body.clone())?; + writeln!(fd, "while_{id}_end:")?; + writeln!(fd, " jmp while_{id}_test")?; + writeln!(fd, "while_{id}_exit:")?; + writeln!(fd, "; WHILE({id}) END")?; + }, + AstNode::Module(m) => self.handle_module(fd, prog, m)?, + AstNode::Memory(_) => todo!(), + AstNode::MemUse(_) => todo!(), + AstNode::ConstUse(c) => { + self.used_consts.push(c.ident.clone()); + writeln!(fd, " mov rax, qword [c_{}]", c.ident)?; + writeln!(fd, " push rax")?; + }, + AstNode::FnCall(f)=> { + writeln!(fd, " call {f} ; FUNCTIONCALL({f:?})", f=f.ident)?; + }, + AstNode::Block(b)=> { + writeln!(fd, "; BLOCK({}) START", b.comment)?; + self.handle_ast_list(fd, prog, b.body.clone())?; + writeln!(fd, "; BLOCK({}) END", b.comment)?; + }, + AstNode::Token(t) => self.handle_token(fd, prog, t)?, + AstNode::Int(_, _) | + AstNode::Str(_, _) | + AstNode::CStr(_, _) | + AstNode::Char(_, _) => unreachable!(), + } + } + Ok(()) + } +} + + +impl Compiler for X86_64LinuxNasmCompiler { + fn new() -> Self { + Self { + strings: Vec::new(), + used_consts: Vec::new(), + if_i: 0, + while_i: 0, + } + } + + fn generate_asm(&mut self, prog: &Program, fd: &mut BufWriter) -> anyhow::Result<()> { + + writeln!(fd, "BITS 64")?; + writeln!(fd, "segment .text")?; + writeln!(fd, "{}", utils::DBG_PRINT)?; + writeln!(fd, "global _start")?; + writeln!(fd, "_start:")?; + writeln!(fd, " lea rbp, [rel ret_stack]")?; + writeln!(fd, " call main")?; + writeln!(fd, " jmp __MCL_END__")?; + + match &prog.ast { + AstNode::Module(m) => { + self.handle_module(fd, prog, m)?; + }, + _ => panic!() + } + + + writeln!(fd, "__MCL_END__:")?; + writeln!(fd, " mov rax, 60")?; + writeln!(fd, " mov rdi, 0")?; + writeln!(fd, " syscall")?; + + writeln!(fd, "segment .data")?; + for (_, v) in prog.constants.iter() { + + if !self.used_consts.contains(&v.ident) { + continue; + } + + match Box::leak(v.value.clone()) { + AstNode::Int(_, val) => { + writeln!(fd, "c_{}: dq {}", v.ident, val)?; + } + AstNode::Str(_, val) | + AstNode::CStr(_, val) => { + let s_chars = val.chars().map(|c| (c as u32).to_string()).collect::>(); + let s_list = s_chars.join(","); + writeln!(fd, "c_{}: db {} ; {}", v.ident, s_list, val.escape_debug())?; + } + AstNode::Char(_, val) => { + writeln!(fd, "c_{}: db {} ; '{}'", v.ident, *val as u8, val)?; + } + c => panic!("{c:?}") + }; + } + + for (i, s) in self.strings.iter().enumerate() { + let s_chars = s.chars().map(|c| (c as u32).to_string()).collect::>(); + let s_list = s_chars.join(","); + writeln!(fd, "str_{i}: db {} ; STRDEF({})", s_list, s.escape_debug())?; + } + writeln!(fd, "segment .bss")?; + writeln!(fd, "ret_stack: resq 256")?; + + //TODO: Memories + + + Ok(()) + } + + + fn compile(&mut self, asm_fp: &Path, obj_fp: &Path) -> anyhow::Result<()> { + run_cmd("nasm", vec![ + String::from("-felf64"), + String::from("-o"), + obj_fp.to_string_lossy().to_string(), + asm_fp.to_string_lossy().to_string() + ]) + } + + fn link(&mut self, obj_files: Vec, bin_fp: &Path) -> anyhow::Result<()> { + let mut args = vec![ + String::from("-o"), + bin_fp.to_string_lossy().to_string(), + ]; + + for f in obj_files { + args.push(f.to_string_lossy().to_string()) + } + + run_cmd("ld", args) + } + + fn needed_dependencies(&mut self) -> Vec<&str> { + vec![ + "nasm", + "ld" + ] + } +} \ No newline at end of file diff --git a/src/compiler/x86_64_linux_nasm/utils.rs b/src/compiler/x86_64_linux_nasm/utils.rs new file mode 100644 index 0000000..82a0895 --- /dev/null +++ b/src/compiler/x86_64_linux_nasm/utils.rs @@ -0,0 +1,37 @@ + + +pub const DBG_PRINT: &'static str = " +_dbg_print: + mov r9, -3689348814741910323 + sub rsp, 40 + mov BYTE [rsp+31], 10 + lea rcx, [rsp+30] +.L2: + mov rax, rdi + lea r8, [rsp+32] + mul r9 + mov rax, rdi + sub r8, rcx + shr rdx, 3 + lea rsi, [rdx+rdx*4] + add rsi, rsi + sub rax, rsi + add eax, 48 + mov BYTE [rcx], al + mov rax, rdi + mov rdi, rdx + mov rdx, rcx + sub rcx, 1 + cmp rax, 9 + ja .L2 + lea rax, [rsp+32] + mov edi, 1 + sub rdx, rax + xor eax, eax + lea rsi, [rsp+32+rdx] + mov rdx, r8 + mov rax, 1 + syscall + add rsp, 40 + ret +"; \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..cc41cac --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,302 @@ +use std::path::Path; +use anyhow::bail; + +use crate::{error, types::{common::Loc, token::{InstructionType, KeywordType, Token, TokenType}}}; + + + +pub struct Lexer { + pub loc: Loc, + pub tokens: Vec +} + +impl Lexer { + pub fn new() -> Self { + Self { + loc: Default::default(), + tokens: Default::default(), + } + } + + + pub fn lex(&mut self, file: &Path) -> anyhow::Result<&mut Self> { + self.reset(file); + + let chars = match std::fs::read_to_string(file) { + Ok(c) => c, + Err(e) => { + error!("Failed to open file {file:?} : {e}"); + bail!(""); + } + }.chars().collect::>(); + + + let mut idx = 0; + let mut buf = String::new(); + let mut is_searching = false; + + if let Err(_) = self.go_to_first_char(&chars, &mut idx) { + return Ok(self); + } + + let mut start_loc = self.loc.clone(); + while idx < chars.len() { + + match chars[idx] { + + 'c' if chars.get(idx + 1) == Some(&'"') => { + start_loc = self.loc.clone(); + is_searching = true; + idx += 2; // skip c and " + self.loc.col += 2; + + if !buf.is_empty() { + debug!({loc => self.loc() }, "buffer was not empty, intresting"); + } + + loop { + if chars[idx] == '"' && chars[idx-1] != '\\' { + break; + } + buf.push(chars[idx]); + if chars[idx] == '\n' { + self.loc.inc_line() + } + self.loc.inc_col(); + idx += 1; + } + + buf.push('\0'); + let str = self.unescape(&&buf); + self.loc.inc_col(); + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::PushCStr(str)), self.loc(), buf.clone())); + buf.clear(); + } + + '"' => { + start_loc = self.loc.clone(); + is_searching = true; + idx += 1; // skip " + self.loc.col += 1; + + if !buf.is_empty() { + debug!({loc => self.loc() }, "buffer was not empty, intresting ({buf:?})"); + } + + // while chars.get(idx+1) != Some(&'"') && chars[idx] != '\\' && chars.get(idx+1).is_some() { + loop { + if chars[idx] == '"' && chars[idx-1] != '\\' { + break; + } + buf.push(chars[idx]); + if chars[idx] == '\n' { + self.loc.inc_line() + } + self.loc.inc_col(); + idx += 1; + } + + + let str = self.unescape(&buf); + self.loc.inc_col(); + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::PushStr(str)), start_loc.clone(), buf.clone())); + buf.clear(); + } + + '\'' => { + start_loc = self.loc.clone(); + is_searching = true; + idx += 1; // skip ' + self.loc.col += 1; + + if !buf.is_empty() { + debug!({loc => self.loc() }, "buffer was not empty, intresting ({buf})"); + } + + loop { + if chars[idx] == '"' && chars[idx-1] != '\\' { + break; + } + buf.push(chars[idx]); + if chars[idx] == '\n' { + self.loc.inc_line() + } + self.loc.inc_col(); + idx += 1; + } + + let str = self.unescape(&&&buf); + if str.len() > 1 { + error!({loc => self.loc()}, "Chars can only have 1 char"); + bail!("") + } + + self.loc.inc_col(); + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::PushStr(str)), self.loc(), buf.clone())); + buf.clear(); + } + + ch @ (' ' | '\n' | '\r') => { + if ch == '\n' { + self.loc.inc_line(); + } else { + self.loc.inc_col(); + } + if !buf.is_empty() { + //TODO: Implement signed ints + if let Ok(int) = parse_int::parse::(&buf) { + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::PushInt(int)), start_loc.clone(), buf.clone())); + } else { + let token_type = self.match_token_type(&buf); + self.tokens.push(Token::new(token_type, start_loc.clone(), buf.clone())); + } + + buf.clear(); + is_searching = true; + } + } + + '/' if chars.get(idx + 1) == Some(&'/') => { + while chars.get(idx) != Some(&'\n') { + self.loc.inc_col(); + idx += 1; + } + self.loc.inc_line(); + } + + + ch => { + if is_searching { + is_searching = false; + start_loc = self.loc.clone(); + } + + buf.push(ch); + self.loc.inc_col(); + } + + } + idx += 1; + } + //? Add last token + //TODO: Implement signed ints + if !buf.is_empty() { + if let Ok(int) = parse_int::parse::(&buf) { + self.tokens.push(Token::new(TokenType::Instruction(InstructionType::PushInt(int)), start_loc.clone(), buf.clone())); + } else { + let token_type = self.match_token_type(&buf); + self.tokens.push(Token::new(token_type, start_loc.clone(), buf.clone())); + } + } + + // for t in &self.tokens { + // debug!({loc => t.loc.clone()}, "token: {:?}", t.typ); + // } + + Ok(self) + } + + fn go_to_first_char(&mut self, chars: &Vec, idx: &mut usize) -> anyhow::Result<()> { + loop { + if let Some(c) = chars.get(*idx) { + match c { + ' ' | '\r' => self.loc.inc_col(), + '\n' => self.loc.inc_line(), + _ => break, + } + *idx += 1; + } else { + warn!("Empty program"); + bail!("") + } + } + + + Ok(()) + } + + fn match_token_type(&self, s: &str) -> TokenType { + match s { + "if" => TokenType::Keyword(KeywordType::If), + "else" => TokenType::Keyword(KeywordType::Else), + "end" => TokenType::Keyword(KeywordType::End), + "while" => TokenType::Keyword(KeywordType::While), + "do" => TokenType::Keyword(KeywordType::Do), + "include" => TokenType::Keyword(KeywordType::Include), + "memory" => TokenType::Keyword(KeywordType::Memory), + "const" => TokenType::Keyword(KeywordType::Constant), + "fn" => TokenType::Keyword(KeywordType::Function), + "then" => TokenType::Keyword(KeywordType::Then), + "done" => TokenType::Keyword(KeywordType::Done), + "struct" => TokenType::Keyword(KeywordType::Struct), + "inline" => TokenType::Keyword(KeywordType::Inline), + "export" => TokenType::Keyword(KeywordType::Export), + "extern" => TokenType::Keyword(KeywordType::Extern), + "returns" => TokenType::Keyword(KeywordType::Returns), + "with" => TokenType::Keyword(KeywordType::With), + "drop" => TokenType::Instruction(InstructionType::Drop), + "_dbg_print"=> TokenType::Instruction(InstructionType::Print), + "dup" => TokenType::Instruction(InstructionType::Dup), + "rot" => TokenType::Instruction(InstructionType::Rot), + "over" => TokenType::Instruction(InstructionType::Over), + "swap" => TokenType::Instruction(InstructionType::Swap), + "sub" => TokenType::Instruction(InstructionType::Minus), + "add" => TokenType::Instruction(InstructionType::Plus), + "eq" => TokenType::Instruction(InstructionType::Equals), + "gt" => TokenType::Instruction(InstructionType::Gt), + "lt" => TokenType::Instruction(InstructionType::Lt), + "ge" => TokenType::Instruction(InstructionType::Ge), + "le" => TokenType::Instruction(InstructionType::Le), + "neq" => TokenType::Instruction(InstructionType::NotEquals), + "band" => TokenType::Instruction(InstructionType::Band), + "bor" => TokenType::Instruction(InstructionType::Bor), + "shr" => TokenType::Instruction(InstructionType::Shr), + "shl" => TokenType::Instruction(InstructionType::Shl), + "divmod" => TokenType::Instruction(InstructionType::DivMod), + "mul" => TokenType::Instruction(InstructionType::Mul), + "read8" => TokenType::Instruction(InstructionType::Read8), + "write8" => TokenType::Instruction(InstructionType::Write8), + "read32" => TokenType::Instruction(InstructionType::Read32), + "write32" => TokenType::Instruction(InstructionType::Write32), + "read64" => TokenType::Instruction(InstructionType::Read64), + "write64" => TokenType::Instruction(InstructionType::Write64), + "syscall0" => TokenType::Instruction(InstructionType::Syscall0), + "syscall1" => TokenType::Instruction(InstructionType::Syscall1), + "syscall2" => TokenType::Instruction(InstructionType::Syscall2), + "syscall3" => TokenType::Instruction(InstructionType::Syscall3), + "syscall4" => TokenType::Instruction(InstructionType::Syscall4), + "syscall5" => TokenType::Instruction(InstructionType::Syscall5), + "syscall6" => TokenType::Instruction(InstructionType::Syscall6), + "(bool)" => TokenType::Instruction(InstructionType::CastBool), + "(ptr)" => TokenType::Instruction(InstructionType::CastPtr), + "(int)" => TokenType::Instruction(InstructionType::CastInt), + "(void)" => TokenType::Instruction(InstructionType::CastVoid), + "bool" => TokenType::Instruction(InstructionType::TypeBool), + "ptr" => TokenType::Instruction(InstructionType::TypePtr), + "int" => TokenType::Instruction(InstructionType::TypeInt), + "void" => TokenType::Instruction(InstructionType::TypeVoid), + "any" => TokenType::Instruction(InstructionType::TypeAny), + "return" => TokenType::Instruction(InstructionType::Return), + t => TokenType::Unknown(t.to_string()) + } + } + + pub fn reset(&mut self, file: &Path) -> &mut Self { + self.loc.file = file.to_string_lossy().to_string(); + self.loc.line = 1; + self.loc.col = 0; + self.tokens = Vec::new(); + self + } + + fn loc(&self) -> Loc { + self.loc.clone() + } + fn unescape(&self, s: &String) -> String { + //TODO: add more escapes + s + .replace("\\n", "\n") + .replace("\\0", "\0") + } + +} \ No newline at end of file diff --git a/src/logger/colors.rs b/src/logger/colors.rs new file mode 100644 index 0000000..5950972 --- /dev/null +++ b/src/logger/colors.rs @@ -0,0 +1,32 @@ +#![allow(dead_code)] +pub const RESET: &str = "\x1b[0m"; +pub const BOLD: &str = "\x1b[1m"; +pub const ITALIC: &str = "\x1b[3m"; +pub const UNDERLINE: &str = "\x1b[4m"; +pub const BLINK: &str = "\x1b[5m"; +pub const BLINK2: &str = "\x1b[6m"; +pub const SELECTED: &str = "\x1b[7m"; +pub const BLACK: &str = "\x1b[30m"; +pub const RED: &str = "\x1b[31m"; +pub const GREEN: &str = "\x1b[32m"; +pub const YELLOW: &str = "\x1b[33m"; +pub const BLUE: &str = "\x1b[34m"; +pub const MAGENTA: &str = "\x1b[35m"; +pub const BEIGE: &str = "\x1b[36m"; +pub const WHITE: &str = "\x1b[37m"; +pub const BLACKBG: &str = "\x1b[40m"; +pub const REDBG: &str = "\x1b[41m"; +pub const GREENBG: &str = "\x1b[42m"; +pub const YELLOWBG: &str = "\x1b[43m"; +pub const BLUEBG: &str = "\x1b[44m"; +pub const MAGENTABG: &str = "\x1b[45m"; +pub const BEIGEBG: &str = "\x1b[46m"; +pub const WHITEBG: &str = "\x1b[47m"; +pub const GREY: &str = "\x1b[90m"; +pub const RED2: &str = "\x1b[91m"; +pub const GREEN2: &str = "\x1b[92m"; +pub const YELLOW2: &str = "\x1b[93m"; +pub const BLUE2: &str = "\x1b[94m"; +pub const MAGENTA2: &str = "\x1b[95m"; +pub const BEIGE2: &str = "\x1b[96m"; +pub const WHITE2: &str = "\x1b[97m"; diff --git a/src/logger/macros.rs b/src/logger/macros.rs new file mode 100644 index 0000000..5b1056a --- /dev/null +++ b/src/logger/macros.rs @@ -0,0 +1,106 @@ + + +#[macro_export] +macro_rules! log { + ({$($k: expr => $v: expr),* $(,)? }, $lvl:expr, $($arg:tt),+) => { + crate::log_tagged!({$($k => $v,)*}, crate::logger::Level::Info, $($arg)+) + }; + (module: $module:expr, $lvl:expr, $($arg:tt)+) => { + unsafe { + crate::logger::LOGGER.log( + crate::logger::LogEvent { + level: $lvl, + module_path: $module.to_string(), + message: format!($($arg)+), + tags: std::collections::HashMap::new() + } + ) + } + }; + + + ($lvl:expr, $($arg:tt)+) => { + crate::log!(module: module_path!(), $lvl, $($arg)+) + }; +} + +#[macro_export] +macro_rules! log_tagged { + ({$($k: expr => $v: expr),* $(,)? }, $module:expr, $lvl:expr, $($arg:tt)+) => { + unsafe { + crate::logger::LOGGER.log( + crate::logger::LogEvent { + level: $lvl, + module_path: $module.to_string(), + message: format!($($arg)+), + tags: map_macro::hash_map!{$(stringify!($k).to_string() => Box::new($v) as Box,)*} + } + ) + } + }; +} + + +#[macro_export] +macro_rules! debug { + (module: $module:expr, $($arg:tt)+) => { + crate::log!(module: $module, crate::logger::Level::Debug, $($arg:tt)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, module_path!(), crate::logger::Level::Debug, $($arg)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, module: $module:expr, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, $module, crate::logger::Level::Debug, $($arg)+) + }; + ($($arg:tt)+) => { + crate::log!(crate::logger::Level::Debug, $($arg)+) + }; +} + +#[macro_export] +macro_rules! info { + (module: $module:expr, $($arg:tt)+) => { + crate::log!(module: $module, crate::logger::Level::Info, $($arg:tt)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, module_path!(), crate::logger::Level::Info, $($arg)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, module: $module:expr, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, $module, crate::logger::Level::Info, $($arg)+) + }; + ($($arg:tt)+) => { + crate::log!(crate::logger::Level::Info, $($arg)+) + }; +} + +#[macro_export] +macro_rules! warn { + (module: $module:expr, $($arg:tt)+) => { + crate::log!(module: $module, crate::logger::Level::Warn, $($arg:tt)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, module_path!(), crate::logger::Level::Warn, $($arg)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, module: $module:expr, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, $module, crate::logger::Level::Warn, $($arg)+) + }; + ($($arg:tt)+) => { + crate::log!(crate::logger::Level::Warn, $($arg)+) + }; +} + +#[macro_export] +macro_rules! error { + (module: $module:expr, $($arg:tt)+) => { + crate::log!(module: $module, crate::logger::Level::Error, $($arg:tt)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, module_path!(), crate::logger::Level::Error, $($arg)+) + }; + ({$($k: expr => $v: expr),* $(,)? }, module: $module:expr, $($arg:tt)+) => { + crate::log_tagged!({$($k => $v,)*}, $module, crate::logger::Level::Error, $($arg)+) + }; + ($($arg:tt)+) => { + crate::log!(crate::logger::Level::Error, $($arg)+) + }; +} diff --git a/src/logger/mod.rs b/src/logger/mod.rs new file mode 100644 index 0000000..4e96324 --- /dev/null +++ b/src/logger/mod.rs @@ -0,0 +1,83 @@ + +// use log::{Level, LevelFilter, Metadata, Record, SetLoggerError}; + +use std::ops::Deref; + +use crate::{cli::CliArgs, types::common::Loc}; + +mod types; +mod colors; +#[macro_use] +pub mod macros; +pub use types::{Level, LogEvent, LOGGER}; +use types::*; + + +pub struct Logger{ + pub level: i8 +} + + +impl Logger { + pub fn new(args: &CliArgs) -> Box { + Box::new(Self { + level: args.verbose + }) + } + + pub fn init(args: &CliArgs) -> anyhow::Result<()>{ + unsafe { + types::LOGGER = Box::leak( + Self::new(args) + ); + } + Ok(()) + } + + fn get_prefix(&self, level: Level) -> String { + use colors::{BOLD, RESET, RED, YELLOW, BLUE, GREEN, MAGENTA}; + match level { + Level::Error => format!("{BOLD}{RED}error{RESET}", ), + Level::Warn => format!("{BOLD}{YELLOW}warn{RESET}", ), + Level::Info => format!("{BOLD}{GREEN}info{RESET}", ), + Level::Debug => format!("{BOLD}{BLUE}debug{RESET}", ), + Level::Trace => format!("{BOLD}{MAGENTA}trace{RESET}", ), + } + } +} + +impl Log for Logger { + fn enabled(&self, level: Level) -> bool { + match level { + Level::Error if self.level >= 0 => true, + Level::Warn | + Level::Info if self.level >= 1 => true, + Level::Debug if self.level >= 2 => true, + Level::Trace if self.level >= 3 => true, + _ => false + } + } + + fn log(&self, event: LogEvent) { + + if self.enabled(event.level) { + let modpath = if event.level > Level::Info { + format!(" [{}]", event.module_path) + } else { + String::new() + }; + + if let Some(loc) = event.tags.get("loc") { + let loc: String = (*loc.deref()).downcast_ref::() + .map_or(String::from("INVALID"), |l| l.to_string()); + println!("{} {}{modpath}: {}", loc, self.get_prefix(event.level), event.message); + } else { + println!("{}{modpath}: {}", self.get_prefix(event.level), event.message); + } + } + } + + fn level(&self) -> i8 { + self.level + } +} \ No newline at end of file diff --git a/src/logger/types.rs b/src/logger/types.rs new file mode 100644 index 0000000..36c59c2 --- /dev/null +++ b/src/logger/types.rs @@ -0,0 +1,40 @@ +use std::{any::Any, collections::HashMap, fmt::Debug }; + + +pub static mut LOGGER: &dyn Log = &NopLogger; + +struct NopLogger; + +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] +pub enum Level { + Error = 1, + Warn, + Info, + Debug, + Trace, +} + +// pub trait Tag: Display + Debug + Any {} + + +pub struct LogEvent { + pub level: Level, + pub module_path: String, + pub message: String, + pub tags: HashMap> +} + + +impl Log for NopLogger { + fn enabled(&self, _: Level) -> bool {false} + fn level(&self) -> i8 {0} + fn log(&self, _: LogEvent) {} +} + + +pub trait Log { + fn enabled(&self, level: Level) -> bool; + fn log(&self, event: LogEvent); + fn level(&self) -> i8; +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0761d80 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,47 @@ + +use std::collections::HashMap; + +#[macro_use] +mod logger; +mod cli; +mod types; +mod lexer; +pub mod parser; +mod compiler; + +fn main() { + let cli_args = cli::CliArgs::parse_with_passthrough(); + logger::Logger::init(&cli_args).expect("Failed to init logger"); + + let mut prog_map = HashMap::new(); + for file in &cli_args.input { + let mut lexer = lexer::Lexer::new(); + + info!("Lexing file {file:?}"); + if let Err(_) = lexer.lex(file.as_std_path()) { + error!("Lexing failed, exiting"); + return; + } + + // for t in &lexer.tokens { + // info!({loc => t.loc.clone()}, "{:?}", t.typ); + // } + // dbg!(&lexer.tokens); + + info!("Parsing file {file:?}"); + let prog = match parser::parse(&cli_args, &mut lexer.tokens) { + Ok(r) => r, + Err(_) => { + error!("Parsing failed, exiting"); + return; + } + }; + + prog_map.insert(file.as_std_path(), prog); + + } + if let Err(_) = compiler::compile_program(&cli_args, prog_map) { + error!("Failed to compile program, exiting"); + } + +} diff --git a/src/parser/builtin.rs b/src/parser/builtin.rs new file mode 100644 index 0000000..4bb61e5 --- /dev/null +++ b/src/parser/builtin.rs @@ -0,0 +1,46 @@ +use std::collections::HashMap; + +use lazy_static::lazy_static; + +use crate::types::{ast::{AstNode, Constant, Module, Program}, common::Loc}; + + +lazy_static!( + static ref DEFAULT_CONSTANTS: HashMap<&'static str, AstNode> = { + let mut h = HashMap::new(); + // No bsd cause im not about to create 3 or 4 diffrent compilation targets + h.insert("__WINDOWS", AstNode::Int(Loc::default(), cfg!(target_os = "windows") as usize)); + h.insert("__LINUX", AstNode::Int(Loc::default(), cfg!(target_os = "linux") as usize)); + h.insert("__ENDIAN_LITTLE", AstNode::Int(Loc::default(), cfg!(target_endian="little") as usize)); + h.insert("__ENDIAN_BIG", AstNode::Int(Loc::default(), cfg!(target_endian="big") as usize)); + + + h + }; +); + + + +pub fn get_builtin_symbols(prog: &mut Program) -> AstNode { + let mut md = Module { + loc: Loc::new(String::from("BUILTIN"), 0, 0), + path: vec![String::from("builtin")], + ident: String::from("BUILTIN"), + body: Vec::new(), + }; + + + + for (k, v) in DEFAULT_CONSTANTS.iter() { + let c = Constant { + loc: Loc::default(), + ident: k.to_string(), + value: Box::from(v.clone()), + }; + prog.constants.insert(k.to_string(), c.clone()); + md.body.push(AstNode::Constant(c)); + } + + + AstNode::Module(md) +} \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..0140bc8 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,551 @@ +mod utils; +mod precompiler; +mod builtin; + +use std::{collections::HashMap, path::Path}; + +use anyhow::{bail, Result}; + +use crate::{cli::CliArgs, lexer::Lexer, types::{ast::{AstNode, Block, ConstUse, Constant, FnCall, Function, If, MemUse, Module, Program, While}, common::Loc, token::{InstructionType, KeywordType, Token, TokenType}}}; + +use self::{builtin::get_builtin_symbols, precompiler::precompile, utils::{expect, peek_check, peek_check_multiple, PeekResult}}; + + +bitflags::bitflags! { + struct Flags: u8 { + const EXTERN = 1 << 0; + const EXPORT = 1 << 1; + const INLINE = 1 << 2; + } +} + +//TODO: Implement Module paths +pub fn parse(cli_args: &CliArgs, tokens: &mut Vec) -> Result { + tokens.reverse(); + let module = Module { + loc: Loc::new(&tokens[0].loc.file, 0, 0), + ident: Path::new(&tokens[0].loc.file).file_stem().expect("Something went horribly wrong").to_string_lossy().to_string(), + body: Vec::new(), + path: vec![], + }; + + + let mut prog = Program { + ast: AstNode::Module(module.clone()), + functions: HashMap::new(), + constants: HashMap::new(), + memories: HashMap::new(), + + }; + + let syms = get_builtin_symbols(&mut prog); + match &mut prog.ast { + AstNode::Module(module) => { + module.body.push(syms) + } + _ => unreachable!() + } + + while !tokens.is_empty() { + let node = parse_next(cli_args, &mut prog, tokens, Flags::empty(), true)?; + match &mut prog.ast { + AstNode::Module(module) => { + module.body.push(node); + } + _ => unreachable!() + } + } + + // prog.ast = module; + + Ok(prog) +} + +fn parse_next(cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, flags: Flags, is_module_root: bool) -> Result { + let token = tokens.pop().expect("We broke reality!"); + // debug!({loc => token.loc.clone()}, "t: {:?}", token.typ); + let ret = match &token.typ { + TokenType::Keyword(kw) => { + match kw { + KeywordType::If => parse_if(&token, cli_args, prog, tokens)?, + KeywordType::While => parse_while(&token, cli_args, prog, tokens)?, + KeywordType::Include => parse_include(&token, cli_args, prog, tokens)?, //TODO: implement include + KeywordType::Memory => todo!(), + KeywordType::Constant => parse_const(&token, cli_args, prog, tokens)?, + KeywordType::Function => parse_function(&token, cli_args, prog, tokens, flags)?, + KeywordType::Struct => todo!(), + KeywordType::Inline => parse_inline(&token, cli_args, prog, tokens, flags)?, + KeywordType::Export => parse_export(&token, cli_args, prog, tokens, flags)?, + KeywordType::Extern => parse_extern(&token, cli_args, prog, tokens, flags)?, + kw => { + dbg!(&prog.constants); + error!({loc => token.loc}, "Unexpected token {kw:?}"); + bail!("") + } + } + }, + TokenType::Instruction(it) => { + if is_module_root { + error!({loc => token.loc}, "Unexpected token {it:?}, please create a main function, this is not a scripting language"); + bail!("") + } else { + AstNode::Token(token) + } + }, + TokenType::Unknown(ut) => { + if is_module_root { + error!({loc => token.loc}, "Unexpected token {ut:?}, please create a main function, this is not a scripting language"); + bail!("") + } else { + // AstNode::Token(token) + parse_unknown(&token, cli_args, prog, tokens, flags)? + } + }, + }; + Ok(ret) +} + +// TODO: Extern functions +fn parse_function(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, flags: Flags ) -> Result { + + + let name = expect(tokens, TokenType::Unknown(String::new()))?; + expect(tokens, TokenType::Keyword(KeywordType::With))?; + let mut args = Vec::new(); + + loop { + if let PeekResult::Correct(t) = peek_check_multiple(tokens, vec![ + TokenType::Instruction(InstructionType::TypeAny), + TokenType::Instruction(InstructionType::TypeBool), + TokenType::Instruction(InstructionType::TypeInt), + TokenType::Instruction(InstructionType::TypePtr), + TokenType::Instruction(InstructionType::TypeVoid), + ]) { + args.push(t.typ.clone()); + } else { + break; + } + tokens.pop(); + } + + expect(tokens, TokenType::Keyword(KeywordType::Returns))?; + + let mut ret_args = Vec::new(); + + loop { + if let PeekResult::Correct(t) = peek_check_multiple(tokens, vec![ + TokenType::Instruction(InstructionType::TypeAny), + TokenType::Instruction(InstructionType::TypeBool), + TokenType::Instruction(InstructionType::TypeInt), + TokenType::Instruction(InstructionType::TypePtr), + TokenType::Instruction(InstructionType::TypeVoid), + ]) { + ret_args.push(t.typ.clone()); + } else { + break; + } + tokens.pop(); + } + + + expect(tokens, TokenType::Keyword(KeywordType::Then))?; + let mut body = Vec::new(); + loop { + + let fn_got = peek_check(tokens, TokenType::Keyword(KeywordType::Done)); + match fn_got { + PeekResult::Correct(_) => break, + PeekResult::Wrong(_) => (), + PeekResult::None => panic!("idk what to do herre"), + } + body.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + } + expect(tokens, TokenType::Keyword(KeywordType::Done))?; + + let fn_def = Function { + loc: org.loc(), + inline: flags.contains(Flags::INLINE), + extrn: flags.contains(Flags::EXTERN), + export: flags.contains(Flags::EXPORT), + ident: name.lexem.clone(), + arg_types: args, + ret_types: ret_args, + body, + }; + //TODO: Support module paths without double definitions + // let mut mp = match &prog.ast { + // AstNode::Module(m) => { + // m.path.clone() + // } + // _ => panic!("") + // }; + // mp.push(name.lexem.clone()); + // let mp = mp.join("::"); + + // prog.function_aliases.insert(mp, name.lexem.clone()); + prog.functions.insert(name.lexem.clone(), fn_def.clone()); + Ok(AstNode::Function(fn_def)) +} + +fn parse_if(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec) -> Result { + let mut test: Vec = Vec::new(); + let mut body: Vec = Vec::new(); + let mut els: Vec = Vec::new(); + loop { + test.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + match peek_check(tokens, TokenType::Keyword(KeywordType::Do)) { + PeekResult::Correct(_) => break, + PeekResult::Wrong(w) => { + match w.typ { + TokenType::Keyword(KeywordType::Then) => { + warn!("If is defined as `if ... do ... done`"); + } + _ => () + } + }, + PeekResult::None => panic!("idk what to do herre"), + } + } + + expect(tokens, TokenType::Keyword(KeywordType::Do))?; + + + + loop { + body.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + match peek_check_multiple(tokens, vec![ + TokenType::Keyword(KeywordType::Else), + TokenType::Keyword(KeywordType::Done), + ]) { + PeekResult::Correct(_) => break, + PeekResult::Wrong(_) => (), + PeekResult::None => panic!("idk what to do herre"), + } + } + + + let els_t = tokens.last().expect("IMPOSSIBLEEE!!!!!!111").clone(); + let els = match els_t.typ.clone() { + TokenType::Keyword(kw) => { + match kw { + KeywordType::Done => { + expect(tokens, TokenType::Keyword(KeywordType::Done))?; + AstNode::Block(Block{ + comment: String::new(), + loc: els_t.loc, + body: Vec::new(), + }) + }, + KeywordType::Else => { + expect(tokens, TokenType::Keyword(KeywordType::Else))?; + if peek_check(tokens, TokenType::Keyword(KeywordType::If)).correct() { + let if_org =expect(tokens, TokenType::Keyword(KeywordType::If))?; + parse_if(&if_org, cli_args, prog, tokens)? + } else { + loop { + els.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + match peek_check(tokens, TokenType::Keyword(KeywordType::Done)) { + PeekResult::Correct(_) => break, + PeekResult::Wrong(w) => { + match w.typ { + TokenType::Keyword(KeywordType::Then) => { + warn!("If is defined as `if ... do ... done`"); + } + _ => () + } + }, + PeekResult::None => panic!("idk what to do herre"), + } + } + expect(tokens, TokenType::Keyword(KeywordType::Done))?; + + AstNode::Block(Block{ + comment: String::new(), + loc: els_t.loc, + body: els, + }) + } + }, + e => { + error!({loc => els_t.loc.clone()}, "Expected {:?} or {:?} but got {:?}", KeywordType::Done, KeywordType::Else, e); + bail!(""); + } + } + }, + e => { + error!({loc => els_t.loc.clone()}, "Expected {:?} or {:?} but got {:?}", KeywordType::Done, KeywordType::Else, e); + bail!(""); + } + }; + Ok(AstNode::If(If{ + test, + body, + els: Box::new(els), + loc: org.loc(), + })) +} + + +fn parse_while(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec) -> Result { + let mut test: Vec = Vec::new(); + let mut body: Vec = Vec::new(); + loop { + test.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + match peek_check(tokens, TokenType::Keyword(KeywordType::Do)) { + PeekResult::Correct(_) => break, + PeekResult::Wrong(w) => { + match w.typ { + TokenType::Keyword(KeywordType::Then) => { + warn!("while is defined as `while ... do ... done`"); + } + _ => () + } + }, + PeekResult::None => panic!("idk what to do herre"), + } + } + + expect(tokens, TokenType::Keyword(KeywordType::Do))?; + + + + loop { + body.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + match peek_check_multiple(tokens, vec![ + TokenType::Keyword(KeywordType::Else), + TokenType::Keyword(KeywordType::Done), + ]) { + PeekResult::Correct(_) => break, + PeekResult::Wrong(_) => (), + PeekResult::None => panic!("idk what to do herre"), + } + } + + + expect(tokens, TokenType::Keyword(KeywordType::Done))?; + + Ok(AstNode::While(While{ + test, + body, + loc: org.loc(), + })) +} + +fn parse_inline(_: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, flags: Flags) -> Result { + let allowed_tokens = vec!{ + TokenType::Keyword(KeywordType::Function) + }; + + + let Some(t) = tokens.last() else { + error!("Expected one of {:?} after {:?} but found nothing", allowed_tokens, TokenType::Keyword(KeywordType::Inline)); + bail!("") + }; + + + let mut found = false; + + for at in &allowed_tokens { + if utils::cmp(at, &t.typ) { + found = true; + } + } + + if !found { + error!({loc => t.loc.clone()}, "Expected one of {:?} after {:?} but found {:?}", allowed_tokens, TokenType::Keyword(KeywordType::Inline), t.typ); + bail!(""); + } + + + parse_next(cli_args, prog, tokens, flags | Flags::INLINE, false) +} + +fn parse_extern(_: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, flags: Flags) -> Result { + let allowed_tokens = vec!{ + TokenType::Keyword(KeywordType::Function), + TokenType::Keyword(KeywordType::Constant), + TokenType::Keyword(KeywordType::Memory), + }; + + + let Some(t) = tokens.last() else { + error!("Expected one of {:?} after {:?} but found nothing", allowed_tokens, TokenType::Keyword(KeywordType::Extern)); + bail!("") + }; + + + let mut found = false; + + for at in &allowed_tokens { + if utils::cmp(at, &t.typ) { + found = true; + } + } + + if !found { + error!({loc => t.loc.clone()}, "Expected one of {:?} after {:?} but found {:?}", allowed_tokens, TokenType::Keyword(KeywordType::Extern), t.typ); + bail!(""); + } + + + parse_next(cli_args, prog, tokens, flags | Flags::EXTERN, false) +} + +fn parse_export(_: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec, flags: Flags) -> Result { + let allowed_tokens = vec!{ + TokenType::Keyword(KeywordType::Function), + TokenType::Keyword(KeywordType::Constant), + TokenType::Keyword(KeywordType::Memory), + }; + + + let Some(t) = tokens.last() else { + error!("Expected one of {:?} after {:?} but found nothing", allowed_tokens, TokenType::Keyword(KeywordType::Export)); + bail!("") + }; + + + let mut found = false; + + for at in &allowed_tokens { + if utils::cmp(at, &t.typ) { + found = true; + } + } + + if !found { + error!({loc => t.loc.clone()}, "Expected one of {:?} after {:?} but found {:?}", allowed_tokens, TokenType::Keyword(KeywordType::Export), t.typ); + bail!(""); + } + + + parse_next(cli_args, prog, tokens, flags | Flags::EXPORT, false) +} + + +fn parse_include(_: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec) -> Result { + let path = expect(tokens, + TokenType::Instruction( + InstructionType::PushStr( + String::new() + ) + ) + )?; + + for ip in &cli_args.include_path { + let p = ip.join(&path.lexem).to_path_buf(); + if p.exists() { + info!({loc => path.loc.clone()}, "Lexing file {}", path.lexem.clone()); + let mut lexer = Lexer::new(); + lexer.lex(p.as_std_path())?; + + let mut mod_tokens = lexer.tokens; + + mod_tokens.reverse(); + + let mut mp = match &prog.ast { + AstNode::Module(m) => { + m.path.clone() + } + _ => panic!("") + }; + + mp.push(p.file_stem().unwrap().to_string()); + + let module = Module { + loc: Loc::new(path.loc.file.clone(), 0, 0), + ident: Path::new(&path.loc.file).file_stem().expect("Something went horribly wrong").to_string_lossy().to_string(), + body: Vec::new(), + path: mp, + }; + + + let mut mod_prog = Program { + ast: AstNode::Module(module), + functions: prog.functions.clone(), + constants: prog.constants.clone(), + memories: prog.memories.clone(), + + }; + + info!({loc => path.loc.clone()}, "Parsing file {}", path.lexem.clone()); + while !mod_tokens.is_empty() { + let node = parse_next(cli_args, &mut mod_prog, &mut mod_tokens, Flags::empty(), true)?; + match &mut mod_prog.ast { + AstNode::Module(module) => { + module.body.push(node); + } + _ => unreachable!() + } + } + + prog.constants = mod_prog.constants; + prog.functions = mod_prog.functions; + prog.memories = mod_prog.memories; + return Ok(mod_prog.ast) + } + + }; + + error!("Could not find file {:?} in these locations: {:?}", path.lexem, cli_args.include_path); + bail!("") + +} + +fn parse_const(org: &Token, cli_args: &CliArgs, prog: &mut Program, tokens: &mut Vec) -> Result { + let name = expect(tokens, TokenType::Unknown(String::new()))?; + + + let mut body = Vec::new(); + loop { + + let t = peek_check(tokens, TokenType::Keyword(KeywordType::End)); + match t { + PeekResult::Correct(_) => break, + PeekResult::Wrong(_) => (), + PeekResult::None => panic!("idk what to do herre"), + } + body.push(parse_next(cli_args, prog, tokens, Flags::empty(), false)?); + } + expect(tokens, TokenType::Keyword(KeywordType::End))?; + + let val = precompile(prog, body, &mut Vec::new())?; + + let name = name.lexem.clone() + .replace("(", "_OPRN_") + .replace(")", "_CPRN_"); + + let def = Constant{ + loc: org.loc(), + ident: name.clone(), + value: Box::new(val), + }; + + + prog.constants.insert(name, def.clone()); + + Ok(AstNode::Constant(def)) +} + +fn parse_unknown(org: &Token, _: &CliArgs, prog: &mut Program, _: &mut Vec, _: Flags ) -> Result { + //TODO: Typing? + if let Some(func) = prog.functions.get(&org.lexem) { + if func.inline { + return Ok(AstNode::Block(Block{ loc: org.loc.clone(), body: func.body.clone(), comment: format!("inline fn {}", func.ident) })) + } else { + return Ok(AstNode::FnCall(FnCall{ loc: org.loc.clone(), ident: org.lexem.clone() })); + } + } + + if let Some(_) = prog.constants.get(&org.lexem) { + return Ok(AstNode::ConstUse(ConstUse{ loc: org.loc.clone(), ident: org.lexem.clone() })); + } + + if let Some(_) = prog.memories.get(&org.lexem) { + return Ok(AstNode::MemUse(MemUse{ loc: org.loc.clone(), ident: org.lexem.clone() })); + } + + dbg!(&prog.constants); + error!({loc => org.loc.clone()}, "Unknown token {:?}", org); + bail!("") +} \ No newline at end of file diff --git a/src/parser/precompiler.rs b/src/parser/precompiler.rs new file mode 100644 index 0000000..a2e0ee1 --- /dev/null +++ b/src/parser/precompiler.rs @@ -0,0 +1,154 @@ +use anyhow::bail; + +use crate::types::{ast::{AstNode, Program}, common::Loc, token::{InstructionType, TokenType}}; + + +pub fn precompile(prog: &Program, ast: Vec, stack: &mut Vec ) -> anyhow::Result { + for node in ast.clone() { + match &node { + AstNode::ConstUse(c) => { + let Some(val) = prog.constants.get(&c.ident) else { + error!({loc => c.loc.clone()}, "Unknown constant {:?}", c.ident) ; + bail!("") + }; + match Box::leak(val.value.clone()) { + t @ AstNode::Int(..) => { + return Ok(t.clone()); + } + + t @ AstNode::Str(..) => { + return Ok(t.clone()); + } + + t @ AstNode::CStr(..) => { + return Ok(t.clone()); + } + + t @ AstNode::Char(..) => { + return Ok(t.clone()); + } + + + // AstNode::Token(t) => { + // match t.typ.clone() { + // TokenType::Instruction(it) => { + // match it { + // InstructionType::PushInt(i) => stack.push(i), + // InstructionType::PushCStr(_) => { + // //TODO: Handle this better + // return Ok(AstNode::Token(t.clone())); + // }, + // InstructionType::PushChar(_) => { + // //TODO: Handle this better + // return Ok(AstNode::Token(t.clone())); + // }, + // _ => panic!() + // } + // }, + // _ => panic!() + // } + // }, + _ => panic!() + } + + }, + AstNode::Token(t) => { + match t.typ.clone() { + TokenType::Keyword(_) => { + error!({loc => t.loc.clone()}, "Unsupported token {t:?}, we dont support precompilation of this") ; + bail!("") + }, + TokenType::Instruction(it) => { + match it { + InstructionType::PushInt(i) => { + stack.push(i); + }, + InstructionType::PushCStr(s) => { + //TODO: Handle this better + return Ok(AstNode::CStr(t.loc.clone(), s)); + }, + InstructionType::PushStr(s) => { + //TODO: Handle this better + return Ok(AstNode::Str(t.loc.clone(), s)); + }, + InstructionType::PushChar(c) => { + //TODO: Handle this better + return Ok(AstNode::Char(t.loc.clone(), c)); + }, + InstructionType::Minus => { + let a = stack_pop(stack, &t.loc)?; + let b = stack_pop(stack, &t.loc)?; + stack.push(b - a); + }, + InstructionType::Plus => { + let a = stack_pop(stack, &t.loc)?; + let b = stack_pop(stack, &t.loc)?; + stack.push(b + a); + }, + InstructionType::DivMod => { + let a = stack_pop(stack, &t.loc)?; + let b = stack_pop(stack, &t.loc)?; + stack.push(b / a); + stack.push(b % a); + }, + InstructionType::Mul => { + let a = stack_pop(stack, &t.loc)?; + let b = stack_pop(stack, &t.loc)?; + stack.push(b * a); + }, + InstructionType::Drop => { + stack_pop(stack, &t.loc)?; + }, + //TODO: Support these later + // InstructionType::Dup => todo!(), + // InstructionType::Rot => todo!(), + // InstructionType::Over => todo!(), + // InstructionType::Swap => todo!(), + // InstructionType::Equals => todo!(), + // InstructionType::Gt => todo!(), + // InstructionType::Lt => todo!(), + // InstructionType::Ge => todo!(), + // InstructionType::Le => todo!(), + // InstructionType::NotEquals => todo!(), + // InstructionType::Band => todo!(), + // InstructionType::Bor => todo!(), + // InstructionType::Shr => todo!(), + // InstructionType::Shl => todo!(), + //TODO: Support this when we have types + // InstructionType::CastBool => todo!(), + // InstructionType::CastPtr => todo!(), + // InstructionType::CastInt => todo!(), + // InstructionType::CastVoid => todo!(), + InstructionType::ConstUse => unreachable!(), + _ => { + error!({loc => t.loc.clone()}, "Unsupported token {t:?}, we dont support precompilation of this") ; + bail!("") + } + } + }, + TokenType::Unknown(_) => todo!(), + } + }, + //TODO: Implement these + t @ AstNode::If { .. } | + t @ AstNode::While { .. } | + t => { + error!({loc => t.loc()}, "Unsupported token {t:?}, we dont support precompilation of this") ; + bail!("") + } + } + + } + + Ok(AstNode::Int(ast[0].loc(), stack[0])) +} + +fn stack_pop(stack: &mut Vec, loc: &Loc) -> anyhow::Result { + match stack.pop() { + Some(i) => Ok(i), + None => { + error!({loc => loc.clone()}, "Failed to precompile tokens, failed to pop from stack"); + bail!("") + }, + } +} \ No newline at end of file diff --git a/src/parser/utils.rs b/src/parser/utils.rs new file mode 100644 index 0000000..9b08b88 --- /dev/null +++ b/src/parser/utils.rs @@ -0,0 +1,100 @@ +use anyhow::{bail, Result}; + +use crate::types::token::{Token, TokenType}; + +#[derive(Debug, Clone, PartialEq, PartialOrd)] +pub enum PeekResult { + Correct(T), + Wrong(T), + None +} + +impl PeekResult { + pub fn correct(&self) -> bool{ + match self { + PeekResult::Correct(_) => true, + _ => false + } + } + #[allow(dead_code)] + pub fn wrong(&self) -> bool{ + match self { + PeekResult::Wrong(_) => true, + _ => false + } + } + + #[allow(dead_code)] + pub fn none(&self) -> bool{ + match self { + PeekResult::None => true, + _ => false + } + } +} + +pub fn cmp(lhs: &TokenType, rhs: &TokenType) -> bool { + match (lhs, rhs) { + (TokenType::Keyword(lhs), TokenType::Keyword(rhs)) => { + std::mem::discriminant(lhs) == std::mem::discriminant(rhs) + }, + (TokenType::Instruction(lhs), TokenType::Instruction(rhs)) => { + std::mem::discriminant(lhs) == std::mem::discriminant(rhs) + }, + (TokenType::Unknown(_), TokenType::Unknown(_)) => true, + _ => false + } +} + +pub fn peek_check_multiple(tokens: &Vec, typs: Vec) -> PeekResult<&Token>{ + let t = tokens.last(); + + if let Some(t) = t { + for tt in typs { + if cmp(&t.typ, &tt) { + return PeekResult::Correct(t); + } + } + PeekResult::Wrong(t) + } else { + PeekResult::None + } +} + +pub fn peek_check(tokens: &Vec, typ: TokenType) -> PeekResult<&Token> { + let t = tokens.last(); + + match t { + Some(t) => { + //? Source: https://doc.rust-lang.org/std/mem/fn.discriminant.html + if cmp(&t.typ, &typ) { + PeekResult::Correct(t) + } else { + PeekResult::Wrong(t) + } + }, + None => { + PeekResult::None + } + } +} + +pub fn expect(tokens: &mut Vec, typ: TokenType) -> Result { + let t = tokens.pop(); + + match t { + Some(t) => { + //? Source: https://doc.rust-lang.org/std/mem/fn.discriminant.html + if std::mem::discriminant(&t.typ) != std::mem::discriminant(&typ) { + error!("Expected {:?}, but got {:?}", typ, t.typ); + bail!("") + } + Ok(t) + }, + None => { + error!("Expected {:?}, but found nothing", typ); + bail!("") + } + } + +} \ No newline at end of file diff --git a/src/types/ast/mod.rs b/src/types/ast/mod.rs new file mode 100644 index 0000000..79b7cd7 --- /dev/null +++ b/src/types/ast/mod.rs @@ -0,0 +1,148 @@ +use std::collections::HashMap; + +use super::{common::Loc, token::{Token, TokenType}}; + + +//TODO: Implement missing stuff +#[derive(Debug, Clone)] +pub enum AstNode { + Int(Loc, usize), + Str(Loc, String), + CStr(Loc, String), + Char(Loc, char), + // ExternFnDef { + // loc: Loc, + // ident: String, + // arg_types: Vec, + // ret_type: TokenType, + // }, + Function(Function), + Constant(Constant), + // ExternConstantDef{ + // loc: Loc, + // ident: String, + // value: InstructionType + // }, + // Struct{ + // loc: Loc, + // ident: String, + // body: Vec<(String, usize)> // (field ident, size in bytes) + // }, + // StructDef{ + // loc: Loc, + // extrn: bool, + // ident: String, + // body: Vec<(String, usize)> // (field ident, size in bytes) + // }, + If(If), + While(While), + Module(Module), + Memory(Memory), + MemUse(MemUse), + ConstUse(ConstUse), + FnCall(FnCall), + Block(Block), + Token(Token), +} + +impl AstNode { + pub fn loc(&self) -> Loc { + match self { + AstNode::Function(f) => f.loc.clone(), + AstNode::Constant(c) => c.loc.clone(), + AstNode::If(t)=> t.loc.clone(), + AstNode::While(t)=> t.loc.clone(), + AstNode::Module(m) => m.loc.clone(), + AstNode::Memory(m) => m.loc.clone(), + AstNode::MemUse(t)=> t.loc.clone(), + AstNode::ConstUse(t)=> t.loc.clone(), + AstNode::FnCall(t)=> t.loc.clone(), + AstNode::Block(t)=> t.loc.clone(), + AstNode::Token(tok) => tok.loc.clone(), + AstNode::Int(loc, _) => loc.clone(), + AstNode::Str(loc, _) => loc.clone(), + AstNode::CStr(loc, _) => loc.clone(), + AstNode::Char(loc, _) => loc.clone(), + } + } +} + +#[derive(Debug, Clone)] +pub struct MemUse { + pub loc: Loc, + pub ident: String, +} +#[derive(Debug, Clone)] +pub struct ConstUse { + pub loc: Loc, + pub ident: String, +} +#[derive(Debug, Clone)] +pub struct FnCall { + pub loc: Loc, + pub ident: String, +} +#[derive(Debug, Clone)] +pub struct Block { + pub comment: String, + pub loc: Loc, + pub body: Vec +} + +#[derive(Debug, Clone)] +pub struct While { + pub loc: Loc, + pub test: Vec, + pub body: Vec, +} + +#[derive(Debug, Clone)] +pub struct If { + pub loc: Loc, + pub test: Vec, + pub body: Vec, + pub els: Box, +} + +#[derive(Debug, Clone)] +pub struct Module { + pub loc: Loc, + pub path: Vec, + pub ident: String, + pub body: Vec +} + +#[derive(Debug, Clone)] +pub struct Function { + pub loc: Loc, + pub ident: String, + pub inline: bool, + pub extrn: bool, + pub export: bool, + pub arg_types: Vec, + pub ret_types: Vec, + pub body: Vec +} + +#[derive(Debug, Clone)] +pub struct Constant { + pub loc: Loc, + pub ident: String, + pub value: Box +} + +#[derive(Debug, Clone)] +pub struct Memory { + pub loc: Loc, + pub ident: String, + pub size: usize // bytes +} + + +#[derive(Debug, Clone)] +pub struct Program { + pub ast: AstNode, + pub functions: HashMap, + pub constants: HashMap, + pub memories: HashMap, +} \ No newline at end of file diff --git a/src/types/common.rs b/src/types/common.rs new file mode 100644 index 0000000..1ac7443 --- /dev/null +++ b/src/types/common.rs @@ -0,0 +1,37 @@ +use std::fmt::Display; + + +#[derive(Debug, Clone, Default, PartialEq)] +pub struct Loc { + pub file: String, + pub line: usize, + pub col: usize +} + + +impl Loc { + pub fn new>(f: T, line: usize, col: usize) -> Self { + Self { + file: f.into(), + line, + col, + } + } + pub fn inc_line(&mut self) { + self.line += 1; + self.col = 0; + } + + pub fn inc_col(&mut self) { + self.col += 1; + } +} + +impl Display for Loc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}:{}", self.file, self.line, self.col)?; + Ok(()) + } +} + + diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..286aa7f --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,3 @@ +pub mod common; +pub mod token; +pub mod ast; diff --git a/src/types/token/mod.rs b/src/types/token/mod.rs new file mode 100644 index 0000000..c757cf3 --- /dev/null +++ b/src/types/token/mod.rs @@ -0,0 +1,120 @@ +#![allow(dead_code)] + +use super::common::Loc; + +#[derive(Debug, Clone, PartialEq)] +pub enum InstructionType { + + // stack + PushInt(usize), + PushStr(String), + PushCStr(String), + PushChar(char), + Drop, + Print, + Dup, + Rot, // a b c => b c a + Over, // a b => a b a + Swap, // a b => b a + + // math + Minus, + Plus, + Equals, + Gt, + Lt, + Ge, + Le, + NotEquals, + Band, // & + Bor, // | + Shr, // >> + Shl, // << + DivMod, // / + Mul, + + + // mem + Read8, + Write8, + Read32, + Write32, + Read64, + Write64, + + // syscalls + Syscall0, + Syscall1, + Syscall2, + Syscall3, + Syscall4, + Syscall5, + Syscall6, + + CastBool, + CastPtr, + CastInt, + CastVoid, + + // typing + TypeBool, + TypePtr, + TypeInt, + TypeVoid, + // TypeStr, + TypeAny, + + FnCall, + MemUse, + ConstUse, + + Return, +} +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum KeywordType { + If, + Else, + End, + While, + Do, + Include, + Memory, + Constant, + Function, + Then, + Done, + Struct, + Inline, + Export, + Extern, + Returns, + With, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum TokenType { + Keyword(KeywordType), + Instruction(InstructionType), + Unknown(String) +} + + +#[derive(Debug, Clone, PartialEq)] +pub struct Token { + pub typ: TokenType, + pub loc: Loc, + pub lexem: String, +} + +impl Token { + pub fn new(typ: TokenType, loc: Loc, lexem: String) -> Self { + Self { + typ, + loc, + lexem, + } + } + pub fn loc(&self) -> Loc { + self.loc.clone() + } +} \ No newline at end of file diff --git a/test.mcl b/test.mcl new file mode 100644 index 0000000..4e0980e --- /dev/null +++ b/test.mcl @@ -0,0 +1,39 @@ +include "std.mcl" + +// structdef Foo do +// buz do sizeof(u64) end +// baz do sizeof(u64) end +// done + +// memory s_foo Foo end + +//? Comments :3 + +// extern fn a with void returns void then done +// inline fn b with void returns void then done +// export fn c with void returns void then done + +// fn putd with int returns void then drop done + +fn main with int ptr returns int then + // 1 2 add + // 69 _dbg_print + "Hewo\n" puts + + // if 3 4 eq do + // "omg what impossible!\n" + // else if 1 1 eq do + // "whaaaaaaaaa\n" + // else + // "finally, some good soup\n" + // done + // puts + + // 10 + // while dup 0 gt do + // "uwu" puts + // dup _dbg_print + // 1 sub + // done + +done