From 9176c4882f2c5e8e8a09c2aaf05afb94b5b3548a Mon Sep 17 00:00:00 2001
From: Kittycannon <alejandro.cervera@sintekanalytics.com>
Date: Sat, 20 Jun 2026 11:53:08 -0600
Subject: [PATCH] beginning of compiler

---
 .gitignore                                    |   6 +
 pygen.ipynb                                   | 302 +++++++
 samples/assembly.ebnf                         |  76 ++
 samples/factorial.spasm                       |  11 +
 spider-compiler.code-workspace                |  21 +
 spider/compiler/assembly/AssemblyParser.hpp   | 812 ++++++++++++++++++
 src/spider/compiler/Compiler.cpp              |   0
 src/spider/compiler/Compiler.hpp              |  10 +
 src/spider/compiler/assembler/Assembler.cpp   |  35 +
 src/spider/compiler/assembler/Assembler.hpp   |  52 ++
 .../compiler/assembler/Disassembler.hpp       |  11 +
 .../compiler/assembler/Dissasembler.cpp       |   0
 src/spider/compiler/common.hpp                |  55 ++
 src/spider/compiler/text/TextReader.cpp       | 104 +++
 src/spider/compiler/text/TextReader.hpp       |  91 ++
 src/spider/compiler/text/utf8.hpp             |  91 ++
 src/spider/compiler/tokens/RootToken.cpp      |   0
 src/spider/compiler/tokens/RootToken.hpp      |  25 +
 src/spider/compiler/tokens/Token.cpp          |  26 +
 src/spider/compiler/tokens/Token.hpp          |  56 ++
 20 files changed, 1784 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 pygen.ipynb
 create mode 100644 samples/assembly.ebnf
 create mode 100644 samples/factorial.spasm
 create mode 100644 spider-compiler.code-workspace
 create mode 100644 spider/compiler/assembly/AssemblyParser.hpp
 create mode 100644 src/spider/compiler/Compiler.cpp
 create mode 100644 src/spider/compiler/Compiler.hpp
 create mode 100644 src/spider/compiler/assembler/Assembler.cpp
 create mode 100644 src/spider/compiler/assembler/Assembler.hpp
 create mode 100644 src/spider/compiler/assembler/Disassembler.hpp
 create mode 100644 src/spider/compiler/assembler/Dissasembler.cpp
 create mode 100644 src/spider/compiler/common.hpp
 create mode 100644 src/spider/compiler/text/TextReader.cpp
 create mode 100644 src/spider/compiler/text/TextReader.hpp
 create mode 100644 src/spider/compiler/text/utf8.hpp
 create mode 100644 src/spider/compiler/tokens/RootToken.cpp
 create mode 100644 src/spider/compiler/tokens/RootToken.hpp
 create mode 100644 src/spider/compiler/tokens/Token.cpp
 create mode 100644 src/spider/compiler/tokens/Token.hpp

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f07a0cd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+# For now, ignore user builds
+# We will eventually change to a custom
+# build system.
+# So hold on
+/bin
+/out
diff --git a/pygen.ipynb b/pygen.ipynb
new file mode 100644
index 0000000..cede54d
--- /dev/null
+++ b/pygen.ipynb
@@ -0,0 +1,302 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "id": "00e26c5b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lark import Lark, Transformer\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "id": "cc16be1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ebnf_targets = {\n",
+    "    \"assembly\": {\n",
+    "        \"src\": \"./samples/assembly.ebnf\",\n",
+    "        \"dst\": \"./spider/compiler/assembly/AssemblyParser.hpp\",\n",
+    "        \"cnt\": None,\n",
+    "    },\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "id": "e88d212f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "--- Loading EBNF Targets ---\n",
+      "✅ Success [assembly]: Loaded './samples/assembly.ebnf' -> Target destination: './spider/compiler/assembly/AssemblyParser.hpp'\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\\n--- Loading EBNF Targets ---\")\n",
+    "for target_name, paths in ebnf_targets.items():\n",
+    "    src_path = paths[\"src\"]\n",
+    "    dst_path = paths[\"dst\"]\n",
+    "    \n",
+    "    try:\n",
+    "        with open(src_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "            paths[\"cnt\"] = file.read()\n",
+    "            print(f\"✅ Success [{target_name}]: Loaded '{src_path}' -> Target destination: '{dst_path}'\")\n",
+    "        \n",
+    "    except FileNotFoundError:\n",
+    "        print(f\"❌ Error [{target_name}]: Source file not found at '{src_path}'\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "id": "e8095002",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from lark import Lark, Transformer\n",
+    "\n",
+    "iso_ebnf_meta_grammar = r\"\"\"\n",
+    "    start: rule+\n",
+    "    rule: RULE_NAME \"=\" expression \";\"\n",
+    "    \n",
+    "    ?expression: alternation\n",
+    "    alternation: sequence (\"|\" sequence)*\n",
+    "    \n",
+    "    sequence: item ( [\",\"] item )*\n",
+    "    \n",
+    "    ?item: atom\n",
+    "         | atom \"?\" -> optional\n",
+    "         | atom \"*\" -> repeat\n",
+    "         | \"[\" expression \"]\" -> optional\n",
+    "         | \"{\" expression \"}\" -> repeat\n",
+    "         \n",
+    "    ?atom: RULE_NAME -> call_rule\n",
+    "         | TERMINAL -> match_terminal\n",
+    "         | SPECIAL_SEQ -> handle_special\n",
+    "         | \"(\" expression \")\" -> group\n",
+    "\n",
+    "    RULE_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/\n",
+    "    TERMINAL: /\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"/ | /'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'/\n",
+    "    SPECIAL_SEQ: /\\?[\\s\\S]*?\\?/\n",
+    "    COMMENT: /\\(\\*([\\s\\S]*?)\\*\\)/\n",
+    "\n",
+    "    %import common.WS\n",
+    "    %ignore WS\n",
+    "    %ignore COMMENT\n",
+    "\"\"\"\n",
+    "\n",
+    "class AssemblyCppGenerator(Transformer):\n",
+    "    def start(self, rules):\n",
+    "        cpp_functions = \"\\n\\n\".join(rules)\n",
+    "        return f\"\"\"#pragma once\n",
+    "\n",
+    "#include <iostream>\n",
+    "#include <string>\n",
+    "#include <vector>\n",
+    "#include <stdexcept>\n",
+    "\n",
+    "class AssemblyParser {{\n",
+    "private:\n",
+    "    std::string src;\n",
+    "    size_t pos = 0;\n",
+    "\n",
+    "    std::string peek_str(size_t len) {{\n",
+    "        if (pos + len <= src.length()) return src.substr(pos, len);\n",
+    "        return src.substr(pos);\n",
+    "    }}\n",
+    "\n",
+    "    char peek() {{ return pos < src.length() ? src[pos] : '\\\\0'; }}\n",
+    "    \n",
+    "    void match_char(char expected) {{\n",
+    "        if (peek() == expected) pos++;\n",
+    "        else throw std::runtime_error(\"Unexpected token matching character\");\n",
+    "    }}\n",
+    "\n",
+    "    void match_string(std::string expected) {{\n",
+    "        if (peek_str(expected.length()) == expected) pos += expected.length();\n",
+    "        else throw std::runtime_error(\"Unexpected token matching string: \" + expected);\n",
+    "    }}\n",
+    "\n",
+    "    bool isUTF8Alpha() {{ return isalpha(peek()); }}\n",
+    "    bool isWhithespaceCharNotCrLf() {{ return peek() == ' ' || peek() == '\\\\t'; }}\n",
+    "    bool isUTF8CharNotCrLf() {{ return peek() != '\\\\r' && peek() != '\\\\n' && peek() != '\\\\0'; }}\n",
+    "    bool isUTF8CharLitCont() {{ return peek() != '\\'' && peek() != '\\\\\\\\'; }}\n",
+    "    bool isUTF8StringLitCont() {{ return peek() != '\"' && peek() != '\\\\\\\\'; }}\n",
+    "\n",
+    "public:\n",
+    "    AssemblyParser(std::string input) : src(input) {{}}\n",
+    "\n",
+    "    void parse() {{\n",
+    "        parse_program(); \n",
+    "        if (pos < src.length()) throw std::runtime_error(\"Trailing characters left unparsed.\");\n",
+    "        std::cout << \"Assembly source compiled cleanly!\" << std::endl;\n",
+    "    }}\n",
+    "\n",
+    "{cpp_functions}\n",
+    "}};\n",
+    "\"\"\"\n",
+    "\n",
+    "    def rule(self, args):\n",
+    "        name, expr = args\n",
+    "        return f\"    void parse_{name}() {{\\n{expr}\\n    }}\"\n",
+    "\n",
+    "    # FIX 1: Explicitly handle choice logic using C++ style paths\n",
+    "    def alternation(self, items):\n",
+    "        code_lines = []\n",
+    "        for i, item in enumerate(items):\n",
+    "            # Clean up padding whitespace if any\n",
+    "            clean_item = str(item).strip()\n",
+    "            if not clean_item: continue\n",
+    "            \n",
+    "            # Since lookahead processing requires FIRST sets, we scaffold a sequential fallback\n",
+    "            if i == 0:\n",
+    "                code_lines.append(f\"        if (/* option {i+1} */ true) {{\\n    {clean_item}\\n        }}\")\n",
+    "            else:\n",
+    "                code_lines.append(f\"        else if (/* option {i+1} */ true) {{\\n    {clean_item}\\n        }}\")\n",
+    "        return \"\\n\".join(code_lines)\n",
+    "\n",
+    "    def sequence(self, items):\n",
+    "        flattened_items = []\n",
+    "        for item in items:\n",
+    "            if isinstance(item, list):\n",
+    "                for sub_item in item:\n",
+    "                    if sub_item: flattened_items.append(str(sub_item).strip())\n",
+    "            elif item:\n",
+    "                flattened_items.append(str(item).strip())\n",
+    "        return \"\\n\".join(f\"        {item}\" for item in flattened_items if item)\n",
+    "\n",
+    "    def call_rule(self, token):\n",
+    "        rule_name = token[0].value if isinstance(token, list) else token.value\n",
+    "        return f\"parse_{rule_name}();\"\n",
+    "\n",
+    "    # FIX 2: Generate match_string instead of match_char for multi-char string keywords like \"include\"\n",
+    "    def match_terminal(self, token):\n",
+    "        raw_token_str = token[0].value if isinstance(token, list) else token.value\n",
+    "        raw_val = raw_token_str[1:-1]\n",
+    "        \n",
+    "        if raw_val == r\"\\r\": return \"match_char('\\\\r');\"\n",
+    "        if raw_val == r\"\\n\": return \"match_char('\\\\n');\"\n",
+    "        if raw_val == r\"\\t\": return \"match_char('\\\\t');\"\n",
+    "        if raw_val == r\"\\\\\": return \"match_char('\\\\\\\\');\"\n",
+    "        if not raw_val: return \"// Empty string match\"\n",
+    "        \n",
+    "        if len(raw_val) > 1:\n",
+    "            return f\"match_string(\\\"{raw_val}\\\");\"\n",
+    "        return f\"match_char('{raw_val}');\"\n",
+    "\n",
+    "    def handle_special(self, token):\n",
+    "        raw_string = token[0].value if isinstance(token, list) else token.value\n",
+    "        func_name = raw_string.strip('?').strip()\n",
+    "        return f\"if ({func_name}()) {{ pos++; }} else {{ throw std::runtime_error(\\\"Failed validation for {func_name}\\\"); }}\"\n",
+    "\n",
+    "    def optional(self, args):\n",
+    "        content = args[0] if not isinstance(args[0], list) else \"\\n        \".join(args[0])\n",
+    "        return f\"// Optional block\\n        if (/* lookahead check */ true) {{\\n    {content}\\n        }}\"\n",
+    "\n",
+    "    def repeat(self, args):\n",
+    "        content = args[0] if not isinstance(args[0], list) else \"\\n        \".join(args[0])\n",
+    "        return f\"// Repeat block\\n        while (/* lookahead check */ true) {{\\n    {content}\\n        }}\"\n",
+    "\n",
+    "    def group(self, args):\n",
+    "        # Flatten grouped elements cleanly to strings\n",
+    "        if isinstance(args, list):\n",
+    "            return \"\\n\".join(str(x) for x in args)\n",
+    "        return str(args)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "id": "558915ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--- Starting C++ Compilation Loop ---\n",
+      "Parsing and converting target rule sets for: assembly\n",
+      "🎉 Code generation complete! Output stored in './spider/compiler/assembly/AssemblyParser.hpp'\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"--- Starting C++ Compilation Loop ---\")\n",
+    "\n",
+    "try:\n",
+    "    meta_parser = Lark(iso_ebnf_meta_grammar, parser='lalr')\n",
+    "    \n",
+    "    for name, target in ebnf_targets.items():\n",
+    "        print(f\"Parsing and converting target rule sets for: {name}\")\n",
+    "        \n",
+    "        # Build the compiler AST tree from your exact text\n",
+    "        syntax_tree = meta_parser.parse(target[\"cnt\"])\n",
+    "        \n",
+    "        # Transform the AST structural nodes into pure C++ Source strings\n",
+    "        compiler_transformer = AssemblyCppGenerator()\n",
+    "        compiled_cpp_header = compiler_transformer.transform(syntax_tree)\n",
+    "        \n",
+    "        # Output directly to your destination path\n",
+    "        os.makedirs(os.path.dirname(target[\"dst\"]), exist_ok=True)\n",
+    "        with open(target[\"dst\"], \"w\", encoding=\"utf-8\") as f:\n",
+    "            f.write(compiled_cpp_header)\n",
+    "            \n",
+    "        print(f\"🎉 Code generation complete! Output stored in '{target['dst']}'\")\n",
+    "\n",
+    "except Exception as e:\n",
+    "    print(f\"❌ Failed to process custom architecture. Error details: \\n{e}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "366688c3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd1aca3f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/samples/assembly.ebnf b/samples/assembly.ebnf
new file mode 100644
index 0000000..849401e
--- /dev/null
+++ b/samples/assembly.ebnf
@@ -0,0 +1,76 @@
+(* Spider Assembly EBNF | Sintek Analytics @ 2026 | All Rights Reserved *)
+
+(* Characters & Structures *)
+letter          = ? isUTF8Alpha ? ;
+digit            = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+alpha_num_char   = letter | digit ;
+
+hex_digit        = digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" ;
+octal_digit      = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ;
+binary_digit     = "0" | "1" ;
+
+ws_char          = ? isWhithespaceCharNotCrLf ? ;
+ws_optional      = { ws_char } ;
+whitespace       = ws_char , { ws_char } ;
+newline          = "\r" | "\n" | "\r\n" ;
+utf8_char        = ? isUTF8CharNotCrLf ? ;
+
+char_escape      = "\\", utf8_char ;
+char_content     = char_escape | ? isUTF8CharLitCont ? ; (* Not ' or \ *)
+char_lit         = "'", char_content, "'" ;
+
+string_char      = char_escape | ? isUTF8StringLitCont ? ; (* Not " or \ *)
+string_lit       = '"', { string_char }, '"' ;
+
+(* Literals *)
+identifier       = ( letter | "_" ) , { alpha_num_char | "_" } ;
+comment          = ";" , { utf8_char } ;
+
+sign             = "+" | "-" ;
+exponent_marker  = "e" | "E" ;
+exponent         = exponent_marker , [ sign ] , digit , { digit } ;
+
+decimal_lit      = [ sign ] , digit , { digit } , [ "B" | "S" | "I" | "L" ] ;
+float_lit        = [ sign ] , ( 
+                      ( digit , { digit } , "." , digit , { digit } , [ exponent ] ) | 
+                      ( "." , digit , { digit } , [ exponent ] ) | 
+                      ( digit , { digit } , exponent ) 
+                    ) , [ "F" | "D" ] ;
+
+hex_lit          = [ sign ] , "0x" , hex_digit , { hex_digit } ;
+octal_lit        = [ sign ] , "0c" , octal_digit , { octal_digit } ;
+binary_lit       = [ sign ] , "0b" , binary_digit , { binary_digit } ;
+
+literal          = decimal_lit | float_lit | hex_lit | octal_lit | binary_lit | string_lit | char_lit ;
+literal_cast     = ("B" | "S" | "I" | "L" | "F" | "D"), ws_optional, "(", ws_optional, literal, ws_optional, ")" ;
+literal_decl     = literal | literal_cast ;
+
+(* Operands *)
+register         = "R" , alpha_num_char , alpha_num_char ;
+addrm_ind        = "[", ws_optional, literal_decl, ws_optional, "]" ;
+addrm_ptr        = "[", ws_optional, register, ws_optional, "]" ;
+addrm_idx        = "[", ws_optional, register, ws_optional, "+", ws_optional, literal_decl, ws_optional, "]";
+addrm_sca        = "[", ws_optional, register, ws_optional, "+", register, ws_optional, "*", ws_optional, literal_decl, ws_optional, "]";
+addrm_dis        = "[", ws_optional, register, ws_optional, "+", register, ws_optional, "*", ws_optional, literal_decl, ws_optional, "+", ws_optional, literal_decl, ws_optional, "]";
+addr_modes       = addrm_ind | addrm_ptr | addrm_idx | addrm_sca | addrm_dis ;
+operand          = register | identifier | literal_decl | addr_modes ;
+
+(* Generalized Instructions *)
+opcode           = letter , { alpha_num_char } ;
+operand_list     = operand , { "," , ws_optional , operand } ;
+instruction      = opcode , [ whitespace , operand_list ] ;
+
+(* Added Preprocessor, Sections, and Metadata Syntaxes *)
+include_decl     = "include", whitespace, string_lit ;
+annotation_oper  = identifier, [ ws_optional, "=", ws_optional, literal_decl ] ;
+annotation_ops   = annotation_oper , { ws_optional, "," , ws_optional , annotation_oper } ;
+annotation_args  = "(", ws_optional, annotation_ops, ws_optional, ")" ;
+annotation       = "@", identifier, [ annotation_args ] ;
+section_decl     = "section", whitespace, ".", identifier ;
+
+(* Line Structure *)
+label            = identifier, ":" ;
+line_content     = include_decl | section_decl | ( [ annotation, whitespace ], [ label, ws_optional ], [ instruction ] ) ;
+line             = ws_optional, [ line_content ], ws_optional , [ comment ] , newline ;
+line_last        = ws_optional, [ line_content ], ws_optional , [ comment ] ;
+program          = { line }, [ line_last ] ;
diff --git a/samples/factorial.spasm b/samples/factorial.spasm
new file mode 100644
index 0000000..be12b4d
--- /dev/null
+++ b/samples/factorial.spasm
@@ -0,0 +1,11 @@
+@asm
+.data
+.code
+MOV RA, 1
+MOV RB, 8 ; Input number
+:loop_start
+MUL RA, RB
+NOT RB ; RB != 0? Updates equal flag
+DEC RB ; RB -= 1
+JEQ loop_start ; If equal flag, goto loop_start
+; End program, result in RA
diff --git a/spider-compiler.code-workspace b/spider-compiler.code-workspace
new file mode 100644
index 0000000..07b9241
--- /dev/null
+++ b/spider-compiler.code-workspace
@@ -0,0 +1,21 @@
+{
+	"folders": [
+		{
+			"path": "."
+		}
+	],
+	"settings": {
+		"gitlens.remotes": [
+			{
+				"domain": "git.sintekanalytics.com",
+				"type": "Gitea",
+				"name": "Sintek Analytics' Git",
+				"protocol": "https",
+			}
+		],
+		"C_Cpp.default.includePath": [
+			"./src"
+		],
+		"terminal.integrated.defaultProfile.windows": "MSYS2 UCRT"
+	}
+}
\ No newline at end of file
diff --git a/spider/compiler/assembly/AssemblyParser.hpp b/spider/compiler/assembly/AssemblyParser.hpp
new file mode 100644
index 0000000..09fd1e0
--- /dev/null
+++ b/spider/compiler/assembly/AssemblyParser.hpp
@@ -0,0 +1,812 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+class AssemblyParser {
+private:
+    std::string src;
+    size_t pos = 0;
+
+    std::string peek_str(size_t len) {
+        if (pos + len <= src.length()) return src.substr(pos, len);
+        return src.substr(pos);
+    }
+
+    char peek() { return pos < src.length() ? src[pos] : '\0'; }
+
+    void match_char(char expected) {
+        if (peek() == expected) pos++;
+        else throw std::runtime_error("Unexpected token matching character");
+    }
+
+    void match_string(std::string expected) {
+        if (peek_str(expected.length()) == expected) pos += expected.length();
+        else throw std::runtime_error("Unexpected token matching string: " + expected);
+    }
+
+    bool isUTF8Alpha() { return isalpha(peek()); }
+    bool isWhithespaceCharNotCrLf() { return peek() == ' ' || peek() == '\t'; }
+    bool isUTF8CharNotCrLf() { return peek() != '\r' && peek() != '\n' && peek() != '\0'; }
+    bool isUTF8CharLitCont() { return peek() != '\'' && peek() != '\\'; }
+    bool isUTF8StringLitCont() { return peek() != '"' && peek() != '\\'; }
+
+public:
+    AssemblyParser(std::string input) : src(input) {}
+
+    void parse() {
+        parse_program();
+        if (pos < src.length()) throw std::runtime_error("Trailing characters left unparsed.");
+        std::cout << "Assembly source compiled cleanly!" << std::endl;
+    }
+
+    void parse_letter() {
+        if (/* option 1 */ true) {
+            if (isUTF8Alpha()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8Alpha"); }
+        }
+    }
+
+    void parse_digit() {
+        if (/* option 1 */ true) {
+            match_char('0');
+        } else if (/* option 2 */ true) {
+            match_char('1');
+        } else if (/* option 3 */ true) {
+            match_char('2');
+        } else if (/* option 4 */ true) {
+            match_char('3');
+        } else if (/* option 5 */ true) {
+            match_char('4');
+        } else if (/* option 6 */ true) {
+            match_char('5');
+        } else if (/* option 7 */ true) {
+            match_char('6');
+        } else if (/* option 8 */ true) {
+            match_char('7');
+        } else if (/* option 9 */ true) {
+            match_char('8');
+        } else if (/* option 10 */ true) {
+            match_char('9');
+        }
+    }
+
+    void parse_alpha_num_char() {
+        if (/* option 1 */ true) {
+            parse_letter();
+        } else if (/* option 2 */ true) {
+            parse_digit();
+        }
+    }
+
+    void parse_hex_digit() {
+        if (/* option 1 */ true) {
+            parse_digit();
+        } else if (/* option 2 */ true) {
+            match_char('A');
+        } else if (/* option 3 */ true) {
+            match_char('B');
+        } else if (/* option 4 */ true) {
+            match_char('C');
+        } else if (/* option 5 */ true) {
+            match_char('D');
+        } else if (/* option 6 */ true) {
+            match_char('E');
+        } else if (/* option 7 */ true) {
+            match_char('F');
+        } else if (/* option 8 */ true) {
+            match_char('a');
+        } else if (/* option 9 */ true) {
+            match_char('b');
+        } else if (/* option 10 */ true) {
+            match_char('c');
+        } else if (/* option 11 */ true) {
+            match_char('d');
+        } else if (/* option 12 */ true) {
+            match_char('e');
+        } else if (/* option 13 */ true) {
+            match_char('f');
+        }
+    }
+
+    void parse_octal_digit() {
+        if (/* option 1 */ true) {
+            match_char('0');
+        } else if (/* option 2 */ true) {
+            match_char('1');
+        } else if (/* option 3 */ true) {
+            match_char('2');
+        } else if (/* option 4 */ true) {
+            match_char('3');
+        } else if (/* option 5 */ true) {
+            match_char('4');
+        } else if (/* option 6 */ true) {
+            match_char('5');
+        } else if (/* option 7 */ true) {
+            match_char('6');
+        } else if (/* option 8 */ true) {
+            match_char('7');
+        }
+    }
+
+    void parse_binary_digit() {
+        if (/* option 1 */ true) {
+            match_char('0');
+        } else if (/* option 2 */ true) {
+            match_char('1');
+        }
+    }
+
+    void parse_ws_char() {
+        if (/* option 1 */ true) {
+            if (isWhithespaceCharNotCrLf()) { pos++; } else { throw std::runtime_error("Failed validation for isWhithespaceCharNotCrLf"); }
+        }
+    }
+
+    void parse_ws_optional() {
+        if (/* option 1 */ true) {
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_ws_char();
+                }
+            }
+        }
+    }
+
+    void parse_whitespace() {
+        if (/* option 1 */ true) {
+            parse_ws_char();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_ws_char();
+                }
+            }
+        }
+    }
+
+    void parse_newline() {
+        if (/* option 1 */ true) {
+            match_char('\r');
+        } else if (/* option 2 */ true) {
+            match_char('\n');
+        } else if (/* option 3 */ true) {
+            match_string("\r\n");
+        }
+    }
+
+    void parse_utf8_char() {
+        if (/* option 1 */ true) {
+            if (isUTF8CharNotCrLf()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8CharNotCrLf"); }
+        }
+    }
+
+    void parse_char_escape() {
+        if (/* option 1 */ true) {
+            match_char('\\');
+            parse_utf8_char();
+        }
+    }
+
+    void parse_char_content() {
+        if (/* option 1 */ true) {
+            parse_char_escape();
+        } else if (/* option 2 */ true) {
+            if (isUTF8CharLitCont()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8CharLitCont"); }
+        }
+    }
+
+    void parse_char_lit() {
+        if (/* option 1 */ true) {
+            match_char('\'');
+            parse_char_content();
+            match_char('\'');
+        }
+    }
+
+    void parse_string_char() {
+        if (/* option 1 */ true) {
+            parse_char_escape();
+        } else if (/* option 2 */ true) {
+            if (isUTF8StringLitCont()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8StringLitCont"); }
+        }
+    }
+
+    void parse_string_lit() {
+        if (/* option 1 */ true) {
+            match_char('"');
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_string_char();
+                }
+            }
+            match_char('"');
+        }
+    }
+
+    void parse_identifier() {
+        if (/* option 1 */ true) {
+            if (/* option 1 */ true) {
+                parse_letter();
+            } else if (/* option 2 */ true) {
+                match_char('_');
+            }
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_alpha_num_char();
+                } else if (/* option 2 */ true) {
+                    match_char('_');
+                }
+            }
+        }
+    }
+
+    void parse_comment() {
+        if (/* option 1 */ true) {
+            match_char(';');
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_utf8_char();
+                }
+            }
+        }
+    }
+
+    void parse_sign() {
+        if (/* option 1 */ true) {
+            match_char('+');
+        } else if (/* option 2 */ true) {
+            match_char('-');
+        }
+    }
+
+    void parse_exponent_marker() {
+        if (/* option 1 */ true) {
+            match_char('e');
+        } else if (/* option 2 */ true) {
+            match_char('E');
+        }
+    }
+
+    void parse_exponent() {
+        if (/* option 1 */ true) {
+            parse_exponent_marker();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_sign();
+                }
+            }
+            parse_digit();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_digit();
+                }
+            }
+        }
+    }
+
+    void parse_decimal_lit() {
+        if (/* option 1 */ true) {
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_sign();
+                }
+            }
+            parse_digit();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_digit();
+                }
+            }
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    match_char('B');
+                } else if (/* option 2 */ true) {
+                    match_char('S');
+                } else if (/* option 3 */ true) {
+                    match_char('I');
+                } else if (/* option 4 */ true) {
+                    match_char('L');
+                }
+            }
+        }
+    }
+
+    void parse_float_lit() {
+        if (/* option 1 */ true) {
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_sign();
+                }
+            }
+            if (/* option 1 */ true) {
+                if (/* option 1 */ true) {
+                    parse_digit();
+                    // Repeat block
+                    while (/* lookahead check */ true) {
+                        if (/* option 1 */ true) {
+                            parse_digit();
+                        }
+                    }
+                    match_char('.');
+                    parse_digit();
+                    // Repeat block
+                    while (/* lookahead check */ true) {
+                        if (/* option 1 */ true) {
+                            parse_digit();
+                        }
+                    }
+                    // Optional block
+                    if (/* lookahead check */ true) {
+                        if (/* option 1 */ true) {
+                            parse_exponent();
+                        }
+                    }
+                }
+            } else if (/* option 2 */ true) {
+                if (/* option 1 */ true) {
+                    match_char('.');
+                    parse_digit();
+                    // Repeat block
+                    while (/* lookahead check */ true) {
+                        if (/* option 1 */ true) {
+                            parse_digit();
+                        }
+                    }
+                    // Optional block
+                    if (/* lookahead check */ true) {
+                        if (/* option 1 */ true) {
+                            parse_exponent();
+                        }
+                    }
+                }
+            } else if (/* option 3 */ true) {
+                if (/* option 1 */ true) {
+                    parse_digit();
+                    // Repeat block
+                    while (/* lookahead check */ true) {
+                        if (/* option 1 */ true) {
+                            parse_digit();
+                        }
+                    }
+                    parse_exponent();
+                }
+            }
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    match_char('F');
+                } else if (/* option 2 */ true) {
+                    match_char('D');
+                }
+            }
+        }
+    }
+
+    void parse_hex_lit() {
+        if (/* option 1 */ true) {
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_sign();
+                }
+            }
+            match_string("0x");
+            parse_hex_digit();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_hex_digit();
+                }
+            }
+        }
+    }
+
+    void parse_octal_lit() {
+        if (/* option 1 */ true) {
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_sign();
+                }
+            }
+            match_string("0c");
+            parse_octal_digit();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_octal_digit();
+                }
+            }
+        }
+    }
+
+    void parse_binary_lit() {
+        if (/* option 1 */ true) {
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_sign();
+                }
+            }
+            match_string("0b");
+            parse_binary_digit();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_binary_digit();
+                }
+            }
+        }
+    }
+
+    void parse_literal() {
+        if (/* option 1 */ true) {
+            parse_decimal_lit();
+        } else if (/* option 2 */ true) {
+            parse_float_lit();
+        } else if (/* option 3 */ true) {
+            parse_hex_lit();
+        } else if (/* option 4 */ true) {
+            parse_octal_lit();
+        } else if (/* option 5 */ true) {
+            parse_binary_lit();
+        } else if (/* option 6 */ true) {
+            parse_string_lit();
+        } else if (/* option 7 */ true) {
+            parse_char_lit();
+        }
+    }
+
+    void parse_literal_cast() {
+        if (/* option 1 */ true) {
+            if (/* option 1 */ true) {
+                match_char('B');
+            } else if (/* option 2 */ true) {
+                match_char('S');
+            } else if (/* option 3 */ true) {
+                match_char('I');
+            } else if (/* option 4 */ true) {
+                match_char('L');
+            } else if (/* option 5 */ true) {
+                match_char('F');
+            } else if (/* option 6 */ true) {
+                match_char('D');
+            }
+            parse_ws_optional();
+            match_char('(');
+            parse_ws_optional();
+            parse_literal();
+            parse_ws_optional();
+            match_char(')');
+        }
+    }
+
+    void parse_literal_decl() {
+        if (/* option 1 */ true) {
+            parse_literal();
+        } else if (/* option 2 */ true) {
+            parse_literal_cast();
+        }
+    }
+
+    void parse_register() {
+        if (/* option 1 */ true) {
+            match_char('R');
+            parse_alpha_num_char();
+            parse_alpha_num_char();
+        }
+    }
+
+    void parse_addrm_ind() {
+        if (/* option 1 */ true) {
+            match_char('[');
+            parse_ws_optional();
+            parse_literal_decl();
+            parse_ws_optional();
+            match_char(']');
+        }
+    }
+
+    void parse_addrm_ptr() {
+        if (/* option 1 */ true) {
+            match_char('[');
+            parse_ws_optional();
+            parse_register();
+            parse_ws_optional();
+            match_char(']');
+        }
+    }
+
+    void parse_addrm_idx() {
+        if (/* option 1 */ true) {
+            match_char('[');
+            parse_ws_optional();
+            parse_register();
+            parse_ws_optional();
+            match_char('+');
+            parse_ws_optional();
+            parse_literal_decl();
+            parse_ws_optional();
+            match_char(']');
+        }
+    }
+
+    void parse_addrm_sca() {
+        if (/* option 1 */ true) {
+            match_char('[');
+            parse_ws_optional();
+            parse_register();
+            parse_ws_optional();
+            match_char('+');
+            parse_register();
+            parse_ws_optional();
+            match_char('*');
+            parse_ws_optional();
+            parse_literal_decl();
+            parse_ws_optional();
+            match_char(']');
+        }
+    }
+
+    void parse_addrm_dis() {
+        if (/* option 1 */ true) {
+            match_char('[');
+            parse_ws_optional();
+            parse_register();
+            parse_ws_optional();
+            match_char('+');
+            parse_register();
+            parse_ws_optional();
+            match_char('*');
+            parse_ws_optional();
+            parse_literal_decl();
+            parse_ws_optional();
+            match_char('+');
+            parse_ws_optional();
+            parse_literal_decl();
+            parse_ws_optional();
+            match_char(']');
+        }
+    }
+
+    void parse_addr_modes() {
+        if (/* option 1 */ true) {
+            parse_addrm_ind();
+        } else if (/* option 2 */ true) {
+            parse_addrm_ptr();
+        } else if (/* option 3 */ true) {
+            parse_addrm_idx();
+        } else if (/* option 4 */ true) {
+            parse_addrm_sca();
+        } else if (/* option 5 */ true) {
+            parse_addrm_dis();
+        }
+    }
+
+    void parse_operand() {
+        if (/* option 1 */ true) {
+            parse_register();
+        } else if (/* option 2 */ true) {
+            parse_identifier();
+        } else if (/* option 3 */ true) {
+            parse_literal_decl();
+        } else if (/* option 4 */ true) {
+            parse_addr_modes();
+        }
+    }
+
+    void parse_opcode() {
+        if (/* option 1 */ true) {
+            parse_letter();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_alpha_num_char();
+                }
+            }
+        }
+    }
+
+    void parse_operand_list() {
+        if (/* option 1 */ true) {
+            parse_operand();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    match_char(',');
+                    parse_ws_optional();
+                    parse_operand();
+                }
+            }
+        }
+    }
+
+    void parse_instruction() {
+        if (/* option 1 */ true) {
+            parse_opcode();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_whitespace();
+                    parse_operand_list();
+                }
+            }
+        }
+    }
+
+    void parse_include_decl() {
+        if (/* option 1 */ true) {
+            match_string("include");
+            parse_whitespace();
+            parse_string_lit();
+        }
+    }
+
+    void parse_annotation_oper() {
+        if (/* option 1 */ true) {
+            parse_identifier();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_ws_optional();
+                    match_char('=');
+                    parse_ws_optional();
+                    parse_literal_decl();
+                }
+            }
+        }
+    }
+
+    void parse_annotation_ops() {
+        if (/* option 1 */ true) {
+            parse_annotation_oper();
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_ws_optional();
+                    match_char(',');
+                    parse_ws_optional();
+                    parse_annotation_oper();
+                }
+            }
+        }
+    }
+
+    void parse_annotation_args() {
+        if (/* option 1 */ true) {
+            match_char('(');
+            parse_ws_optional();
+            parse_annotation_ops();
+            parse_ws_optional();
+            match_char(')');
+        }
+    }
+
+    void parse_annotation() {
+        if (/* option 1 */ true) {
+            match_char('@');
+            parse_identifier();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_annotation_args();
+                }
+            }
+        }
+    }
+
+    void parse_section_decl() {
+        if (/* option 1 */ true) {
+            match_string("section");
+            parse_whitespace();
+            match_char('.');
+            parse_identifier();
+        }
+    }
+
+    void parse_label() {
+        if (/* option 1 */ true) {
+            parse_identifier();
+            match_char(':');
+        }
+    }
+
+    void parse_line_content() {
+        if (/* option 1 */ true) {
+            parse_include_decl();
+        } else if (/* option 2 */ true) {
+            parse_section_decl();
+        } else if (/* option 3 */ true) {
+            if (/* option 1 */ true) {
+                // Optional block
+                if (/* lookahead check */ true) {
+                    if (/* option 1 */ true) {
+                        parse_annotation();
+                        parse_whitespace();
+                    }
+                }
+                // Optional block
+                if (/* lookahead check */ true) {
+                    if (/* option 1 */ true) {
+                        parse_label();
+                        parse_ws_optional();
+                    }
+                }
+                // Optional block
+                if (/* lookahead check */ true) {
+                    if (/* option 1 */ true) {
+                        parse_instruction();
+                    }
+                }
+            }
+        }
+    }
+
+    void parse_line() {
+        if (/* option 1 */ true) {
+            parse_ws_optional();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_line_content();
+                }
+            }
+            parse_ws_optional();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_comment();
+                }
+            }
+            parse_newline();
+        }
+    }
+
+    void parse_line_last() {
+        if (/* option 1 */ true) {
+            parse_ws_optional();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_line_content();
+                }
+            }
+            parse_ws_optional();
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_comment();
+                }
+            }
+        }
+    }
+
+    void parse_program() {
+        if (/* option 1 */ true) {
+            // Repeat block
+            while (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_line();
+                }
+            }
+            // Optional block
+            if (/* lookahead check */ true) {
+                if (/* option 1 */ true) {
+                    parse_line_last();
+                }
+            }
+        }
+    }
+};
diff --git a/src/spider/compiler/Compiler.cpp b/src/spider/compiler/Compiler.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/src/spider/compiler/Compiler.hpp b/src/spider/compiler/Compiler.hpp
new file mode 100644
index 0000000..740b5cf
--- /dev/null
+++ b/src/spider/compiler/Compiler.hpp
@@ -0,0 +1,10 @@
+#pragma
+
+#include <spider/compiler/common.hpp>
+
+namespace spider {
+
+    class Token;
+    class RootToken;
+
+}
diff --git a/src/spider/compiler/assembler/Assembler.cpp b/src/spider/compiler/assembler/Assembler.cpp
new file mode 100644
index 0000000..7fa45fc
--- /dev/null
+++ b/src/spider/compiler/assembler/Assembler.cpp
@@ -0,0 +1,35 @@
+#include "Assembler.hpp"
+
+namespace spider {
+
+    Assembler::Assembler() {}
+
+    Assembler::~Assembler() {}
+
+    Assembler::Error Assembler::loadFile(const fs::path& path) {
+        // check if path exists
+        fs::path abs_path = fs::canonical(path);
+        if(!fs::exists(abs_path)) return Error::FILE_NOT_FOUND;
+
+        // check if recursive
+        if(fstack.contains(abs_path)) return Error::FILE_RECURSIVE_LOAD;
+        auto ir = fstack.insert(abs_path);
+
+        // Actually load!
+        levels.emplace_back(Level {
+            .reader = std::make_unique<TextReader>(new FileTextReader(abs_path.string())),
+            .source = abs_path.string(),
+        });
+        parseCurrentLevel();
+
+        // alright!
+        fstack.erase(ir.first);
+        return Error::SUCCESS;
+    }
+
+    void Assembler::parseCurrentLevel() {
+        auto& lvl = levels.back();
+        
+    }
+
+}
diff --git a/src/spider/compiler/assembler/Assembler.hpp b/src/spider/compiler/assembler/Assembler.hpp
new file mode 100644
index 0000000..25ef9c0
--- /dev/null
+++ b/src/spider/compiler/assembler/Assembler.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <spider/compiler/common.hpp>
+
+#include <spider/compiler/text/TextReader.hpp>
+
+#include <spider/compiler/tokens/RootToken.hpp>
+
+namespace spider {
+
+    /**
+     * The spider assembler, capable of
+     * converting text into bytecode.
+     */
+    class Assembler {
+    public:
+        enum class Error {
+            SUCCESS,
+            FILE_NOT_FOUND, FILE_RECURSIVE_LOAD,
+        };
+        struct Level {
+            uptr<TextReader> reader;
+            RootToken root;
+            std::string source;
+        };
+        
+    public:
+
+        set<fs::path> fstack;
+        deque<Level> levels;
+
+    public:
+
+        Assembler();
+
+        ~Assembler();
+
+    public:
+
+        /**
+         * Attempts to load a file, fails if it
+         * doesn't exist.
+         */
+        Error loadFile(const fs::path& path);
+
+    private:
+
+        void parseCurrentLevel();
+        
+    };
+
+}
diff --git a/src/spider/compiler/assembler/Disassembler.hpp b/src/spider/compiler/assembler/Disassembler.hpp
new file mode 100644
index 0000000..d98c1f1
--- /dev/null
+++ b/src/spider/compiler/assembler/Disassembler.hpp
@@ -0,0 +1,11 @@
+#pragma once
+
+namespace spider {
+
+    /**
+     * A disassembler, capable of converting bytecode into
+     * readable text.
+     */
+    class Disassembler {};
+
+}
diff --git a/src/spider/compiler/assembler/Dissasembler.cpp b/src/spider/compiler/assembler/Dissasembler.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/src/spider/compiler/common.hpp b/src/spider/compiler/common.hpp
new file mode 100644
index 0000000..2674162
--- /dev/null
+++ b/src/spider/compiler/common.hpp
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include <deque>
+#include <map>
+#include <optional>
+#include <string>
+#include <memory>
+#include <filesystem>
+#include <set>
+
+namespace spider {
+
+    // Absolute Types
+    using u8 = std::uint8_t;
+    using u16 = std::uint16_t;
+    using u32 = std::uint32_t;
+    using u64 = std::uint64_t;
+
+    using i8 = std::int8_t;
+    using i16 = std::int16_t;
+    using i32 = std::int32_t;
+    using i64 = std::int64_t;
+
+    using f32 = float; // TODO: SPIDER_EMULATE_FLOAT will control this
+    using f64 = double;
+
+    // TODO: Check if we're on C++23, there is already stdfloat
+    static_assert(sizeof(f32) == 4, "The f32 type must be exactly 4 bytes.");
+    static_assert(sizeof(f64) == 8, "The f64 type must be exactly 8 bytes.");
+
+    // Utility types
+    using isize = std::size_t;
+
+    // Utility imports
+    using std::vector;
+    using std::deque;
+    using std::map;
+    using std::optional;
+    using std::set;
+    
+    template<typename T> using ptr = std::shared_ptr<T>;
+    template<typename T> using uptr = std::unique_ptr<T>;
+
+    namespace fs = std::filesystem;
+
+    struct pos {
+        isize line;
+        isize col;
+        pos(isize line = 1, isize col = 1)
+        : line(line), col(col) {}
+    };
+
+}
diff --git a/src/spider/compiler/text/TextReader.cpp b/src/spider/compiler/text/TextReader.cpp
new file mode 100644
index 0000000..dc3a1c9
--- /dev/null
+++ b/src/spider/compiler/text/TextReader.cpp
@@ -0,0 +1,104 @@
+#include "TextReader.hpp"
+
+#include <spider/compiler/text/utf8.hpp>
+
+#include <stdexcept>
+
+namespace spider {
+
+    // Text Reader //
+
+    int TextReader::nextByte() {
+        int ch = getStream().get();
+        if (ch == std::istream::traits_type::eof()) {
+            return -1;
+        }
+        return ch;
+    }
+
+    bool TextReader::nextChar(u32& ch) {
+        int n = nextByte();
+        if(n == -1) return false;
+
+        isize len = utf8::seqlen(u8(n));
+        if(len == 0) return false;
+
+        isize i = 1;
+        char arr[4];
+        arr[0] = char(n);
+
+        while(i < len) {
+            n = nextByte();
+            if(n == -1) return false;
+            arr[i++] = char(n);
+        }
+
+        ch = utf8::decodeArr(arr, len);
+        advance(ch);
+        return true;
+    }
+
+    void TextReader::advance(u32 ch) {
+        if (ch == u32('\n')) {
+            if (lastWasCR) {
+                lastWasCR = false; // Mixed CRLF handling
+            } else {
+                at.line++;
+                at.col = 1;
+            }
+        } else if (ch == u32('\r')) {
+            at.line++;
+            at.col = 1;
+            lastWasCR = true;
+        } else {
+            at.col++;
+            lastWasCR = false;
+        }
+    }
+
+    bool TextReader::isEOF() {
+        return getStream().peek() == std::istream::traits_type::eof();
+    }
+
+    pos TextReader::getPosition() const {
+        return at;
+    }
+
+    // File Reader //
+
+    FileTextReader::FileTextReader(const std::string& filename)
+    : fileStream(filename, std::ios::binary) {
+        if (!fileStream.is_open()) {
+            throw std::runtime_error("Failed to open file: " + filename);
+        }
+    }
+
+    std::istream& FileTextReader::getStream() {
+        return fileStream;
+    }
+
+    // String Reader //
+
+    StringTextReader::StringTextReader(std::string initialText)
+        : buffer(std::move(initialText)),
+          stringStream(std::make_unique<std::istringstream>(buffer)) {
+    }
+
+    std::istream& StringTextReader::getStream() {
+        return *stringStream;
+    }
+
+    void StringTextReader::set(const std::string& newText) {
+        buffer = newText;
+        stringStream = std::make_unique<std::istringstream>(buffer);
+        lastWasCR = false;
+    }
+
+    void StringTextReader::append(const std::string& extraText) {
+        std::streampos pos = stringStream->tellg();
+        buffer += extraText;
+        stringStream = std::make_unique<std::istringstream>(buffer);
+        stringStream->seekg(pos);
+    }
+
+}
diff --git a/src/spider/compiler/text/TextReader.hpp b/src/spider/compiler/text/TextReader.hpp
new file mode 100644
index 0000000..b1d0130
--- /dev/null
+++ b/src/spider/compiler/text/TextReader.hpp
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <spider/compiler/common.hpp>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <memory>
+
+namespace spider {
+
+    /**
+     * Abstract Text Reader
+     */
+    class TextReader {
+    protected:
+
+        pos at;
+        bool lastWasCR = false;
+
+    public:
+
+        TextReader() = default;
+
+        virtual ~TextReader() = default;
+
+    protected:
+
+        int nextByte();
+
+    public:
+
+        bool nextChar(u32& ch);
+
+        bool isEOF();
+
+        pos getPosition() const;
+
+    protected:
+
+        void advance(u32 ch);
+
+        virtual std::istream& getStream() = 0;
+
+    };
+
+    /**
+     * File Text Reader
+     */
+    class FileTextReader : public TextReader {
+    private:
+
+        std::ifstream fileStream;
+
+    public:
+
+        explicit FileTextReader(const std::string& filename);
+
+    protected:
+
+        std::istream& getStream() override;
+
+    };
+
+    /**
+     * String Text Reader
+     */
+    class StringTextReader : public TextReader {
+    private:
+
+        std::string buffer;
+        std::unique_ptr<std::istringstream> stringStream;
+
+    public:
+
+        explicit StringTextReader(std::string initialText = "");
+
+    public:
+
+        void set(const std::string& newText);
+
+        void append(const std::string& extraText);
+
+    protected:
+
+        std::istream& getStream() override;
+
+    };
+
+}
diff --git a/src/spider/compiler/text/utf8.hpp b/src/spider/compiler/text/utf8.hpp
new file mode 100644
index 0000000..5db2d90
--- /dev/null
+++ b/src/spider/compiler/text/utf8.hpp
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <spider/compiler/common.hpp>
+
+#include <cstdint>
+#include <cstddef>
+#include <string>
+
+namespace spider {
+
+    namespace utf8 {
+
+        // --------------------- //
+        // UTF-8 Sequence Length //
+        // --------------------- //
+
+        constexpr isize seqlen(u8 c) {
+            if ((c & 0x80) == 0x00) return 1;
+            if ((c & 0xE0) == 0xC0) return 2;
+            if ((c & 0xF0) == 0xE0) return 3;
+            if ((c & 0xF8) == 0xF0) return 4;
+            return 0;
+        }
+
+        constexpr bool isCont(u8 c) {
+            return (c & 0xC0) == 0x80;
+        }
+
+        constexpr isize isValidSeq(const char* src, isize len) {
+            if (len == 0) return 0;
+            isize m = seqlen(u8(src[0]));
+            if (m == 0 || m > len) return 0;
+            for (isize i = 1; i < m; i++) {
+                if (!isCont(u8(src[i]))) return 0;
+            }
+            return m;
+        }
+
+        // ----------------- //
+        // UTF-8 into UTF-32 //
+        // ----------------- //
+        
+        inline isize decode(const char* src, isize len, u32& out) {
+            // check input is valid
+            isize charlen = isValidSeq(src, len);
+            if (charlen == 0) return 0;
+
+            // map of masks, starts at 1
+            static constexpr u8 firstMask[5] = {
+                0x00, // unused
+                0x7F, // 0xxxxxxx
+                0x1F, // 110xxxxx
+                0x0F, // 1110xxxx
+                0x07  // 11110xxx
+            };
+
+            // assemble the char
+            out = u8(src[0]) & firstMask[charlen];
+            for (isize i = 1; i < charlen; ++i) {
+                out <<= 6;
+                out |= u8(src[i]) & 0x3F;
+            }
+            return charlen;
+        }
+        
+        /**
+         * A simpler version, which consider it already
+         * having a validated input array
+         */
+        inline u32 decodeArr(const char* src, isize chlen) {
+            // map of masks, starts at 1
+            static constexpr u8 firstMask[5] = {
+                0x00, // unused
+                0x7F, // 0xxxxxxx
+                0x1F, // 110xxxxx
+                0x0F, // 1110xxxx
+                0x07  // 11110xxx
+            };
+
+            // assemble the char
+            u32 out = u8(src[0]) & firstMask[chlen];
+            for (isize i = 1; i < chlen; ++i) {
+                out <<= 6;
+                out |= u8(src[i]) & 0x3F;
+            }
+            return out;
+        }
+
+    }
+
+}
diff --git a/src/spider/compiler/tokens/RootToken.cpp b/src/spider/compiler/tokens/RootToken.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/src/spider/compiler/tokens/RootToken.hpp b/src/spider/compiler/tokens/RootToken.hpp
new file mode 100644
index 0000000..e74ed68
--- /dev/null
+++ b/src/spider/compiler/tokens/RootToken.hpp
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <spider/compiler/common.hpp>
+
+namespace spider {
+
+    /**
+     * Defines the root of a token.
+     */
+    class RootToken {
+    private:
+
+    public:
+
+        RootToken();
+
+        ~RootToken();
+
+    public:
+
+        void token();
+
+    };
+
+}
diff --git a/src/spider/compiler/tokens/Token.cpp b/src/spider/compiler/tokens/Token.cpp
new file mode 100644
index 0000000..86b759f
--- /dev/null
+++ b/src/spider/compiler/tokens/Token.cpp
@@ -0,0 +1,26 @@
+#include "Token.hpp"
+
+namespace spider {
+
+    Token::Token(pos _at, TokenType _type, std::string _str)
+    : at(_at), type(_type), str(_str) {}
+
+    Token::Token(const Token& tok)
+    : at(tok.at), type(tok.type), str(tok.str), inner(tok.inner) {}
+
+    Token::Token(Token&& tok)
+    : at(tok.at), type(tok.type), str(tok.str), inner(std::move(tok.inner)) {}
+
+    void Token::append(const Token& tok) {
+        inner.push_back(tok);
+    }
+
+    vector<Token> Token::getInner() {
+        return inner;
+    }
+
+    isize Token::innerCount() {
+        return inner.size();
+    }
+
+}
diff --git a/src/spider/compiler/tokens/Token.hpp b/src/spider/compiler/tokens/Token.hpp
new file mode 100644
index 0000000..27a99d4
--- /dev/null
+++ b/src/spider/compiler/tokens/Token.hpp
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <spider/compiler/common.hpp>
+
+namespace spider {
+
+    /**
+     * Token type.
+     */
+    enum class TokenType {
+        // Assembly
+        PREPROCESSOR_TAG,
+        WHITESPACE, NEWLINE,
+        INSTRUCTION, OPCODE,
+        OPERATOR, OPERAND,
+        REGISTER, NUMBER, BIN_NUMBER, OCT_NUMBER, HEX_NUMBER, ADDR_NUMBER,
+        BRACKET, BRACKET_IND, BRACKET_PTR, BRACKET_IDX, COMMA, COMMENT,
+        SECTION, VARIABLE, ASSIGNMENT
+
+        // Classic
+        // Script
+    };
+
+    /**
+     * Defines a general token.
+     */
+    class Token {
+    public:
+
+        const pos at;
+        const TokenType type;
+        const std::string str;
+
+    private:
+
+        vector<Token> inner;
+
+    public:
+
+        Token(pos _at, TokenType _type, std::string _str);
+
+        Token(const Token& tok);
+
+        Token(Token&& tok);
+
+    public:
+
+        void append(const Token& tok);
+
+        vector<Token> getInner();
+
+        isize innerCount();
+
+    };
+
+}