From 9176c4882f2c5e8e8a09c2aaf05afb94b5b3548a Mon Sep 17 00:00:00 2001 From: Kittycannon Date: Sat, 20 Jun 2026 11:53:08 -0600 Subject: [PATCH] beginning of compiler --- .gitignore | 6 + pygen.ipynb | 302 +++++++ samples/assembly.ebnf | 76 ++ samples/factorial.spasm | 11 + spider-compiler.code-workspace | 21 + spider/compiler/assembly/AssemblyParser.hpp | 812 ++++++++++++++++++ src/spider/compiler/Compiler.cpp | 0 src/spider/compiler/Compiler.hpp | 10 + src/spider/compiler/assembler/Assembler.cpp | 35 + src/spider/compiler/assembler/Assembler.hpp | 52 ++ .../compiler/assembler/Disassembler.hpp | 11 + .../compiler/assembler/Dissasembler.cpp | 0 src/spider/compiler/common.hpp | 55 ++ src/spider/compiler/text/TextReader.cpp | 104 +++ src/spider/compiler/text/TextReader.hpp | 91 ++ src/spider/compiler/text/utf8.hpp | 91 ++ src/spider/compiler/tokens/RootToken.cpp | 0 src/spider/compiler/tokens/RootToken.hpp | 25 + src/spider/compiler/tokens/Token.cpp | 26 + src/spider/compiler/tokens/Token.hpp | 56 ++ 20 files changed, 1784 insertions(+) create mode 100644 .gitignore create mode 100644 pygen.ipynb create mode 100644 samples/assembly.ebnf create mode 100644 samples/factorial.spasm create mode 100644 spider-compiler.code-workspace create mode 100644 spider/compiler/assembly/AssemblyParser.hpp create mode 100644 src/spider/compiler/Compiler.cpp create mode 100644 src/spider/compiler/Compiler.hpp create mode 100644 src/spider/compiler/assembler/Assembler.cpp create mode 100644 src/spider/compiler/assembler/Assembler.hpp create mode 100644 src/spider/compiler/assembler/Disassembler.hpp create mode 100644 src/spider/compiler/assembler/Dissasembler.cpp create mode 100644 src/spider/compiler/common.hpp create mode 100644 src/spider/compiler/text/TextReader.cpp create mode 100644 src/spider/compiler/text/TextReader.hpp create mode 100644 src/spider/compiler/text/utf8.hpp create mode 100644 src/spider/compiler/tokens/RootToken.cpp create mode 100644 src/spider/compiler/tokens/RootToken.hpp create mode 100644 src/spider/compiler/tokens/Token.cpp create mode 100644 src/spider/compiler/tokens/Token.hpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f07a0cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# For now, ignore user builds +# We will eventually change to a custom +# build system. +# So hold on +/bin +/out diff --git a/pygen.ipynb b/pygen.ipynb new file mode 100644 index 0000000..cede54d --- /dev/null +++ b/pygen.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 95, + "id": "00e26c5b", + "metadata": {}, + "outputs": [], + "source": [ + "from lark import Lark, Transformer\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "cc16be1a", + "metadata": {}, + "outputs": [], + "source": [ + "ebnf_targets = {\n", + " \"assembly\": {\n", + " \"src\": \"./samples/assembly.ebnf\",\n", + " \"dst\": \"./spider/compiler/assembly/AssemblyParser.hpp\",\n", + " \"cnt\": None,\n", + " },\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "e88d212f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Loading EBNF Targets ---\n", + "✅ Success [assembly]: Loaded './samples/assembly.ebnf' -> Target destination: './spider/compiler/assembly/AssemblyParser.hpp'\n" + ] + } + ], + "source": [ + "print(\"\\n--- Loading EBNF Targets ---\")\n", + "for target_name, paths in ebnf_targets.items():\n", + " src_path = paths[\"src\"]\n", + " dst_path = paths[\"dst\"]\n", + " \n", + " try:\n", + " with open(src_path, \"r\", encoding=\"utf-8\") as file:\n", + " paths[\"cnt\"] = file.read()\n", + " print(f\"✅ Success [{target_name}]: Loaded '{src_path}' -> Target destination: '{dst_path}'\")\n", + " \n", + " except FileNotFoundError:\n", + " print(f\"❌ Error [{target_name}]: Source file not found at '{src_path}'\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "e8095002", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from lark import Lark, Transformer\n", + "\n", + "iso_ebnf_meta_grammar = r\"\"\"\n", + " start: rule+\n", + " rule: RULE_NAME \"=\" expression \";\"\n", + " \n", + " ?expression: alternation\n", + " alternation: sequence (\"|\" sequence)*\n", + " \n", + " sequence: item ( [\",\"] item )*\n", + " \n", + " ?item: atom\n", + " | atom \"?\" -> optional\n", + " | atom \"*\" -> repeat\n", + " | \"[\" expression \"]\" -> optional\n", + " | \"{\" expression \"}\" -> repeat\n", + " \n", + " ?atom: RULE_NAME -> call_rule\n", + " | TERMINAL -> match_terminal\n", + " | SPECIAL_SEQ -> handle_special\n", + " | \"(\" expression \")\" -> group\n", + "\n", + " RULE_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/\n", + " TERMINAL: /\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"/ | /'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'/\n", + " SPECIAL_SEQ: /\\?[\\s\\S]*?\\?/\n", + " COMMENT: /\\(\\*([\\s\\S]*?)\\*\\)/\n", + "\n", + " %import common.WS\n", + " %ignore WS\n", + " %ignore COMMENT\n", + "\"\"\"\n", + "\n", + "class AssemblyCppGenerator(Transformer):\n", + " def start(self, rules):\n", + " cpp_functions = \"\\n\\n\".join(rules)\n", + " return f\"\"\"#pragma once\n", + "\n", + "#include \n", + "#include \n", + "#include \n", + "#include \n", + "\n", + "class AssemblyParser {{\n", + "private:\n", + " std::string src;\n", + " size_t pos = 0;\n", + "\n", + " std::string peek_str(size_t len) {{\n", + " if (pos + len <= src.length()) return src.substr(pos, len);\n", + " return src.substr(pos);\n", + " }}\n", + "\n", + " char peek() {{ return pos < src.length() ? src[pos] : '\\\\0'; }}\n", + " \n", + " void match_char(char expected) {{\n", + " if (peek() == expected) pos++;\n", + " else throw std::runtime_error(\"Unexpected token matching character\");\n", + " }}\n", + "\n", + " void match_string(std::string expected) {{\n", + " if (peek_str(expected.length()) == expected) pos += expected.length();\n", + " else throw std::runtime_error(\"Unexpected token matching string: \" + expected);\n", + " }}\n", + "\n", + " bool isUTF8Alpha() {{ return isalpha(peek()); }}\n", + " bool isWhithespaceCharNotCrLf() {{ return peek() == ' ' || peek() == '\\\\t'; }}\n", + " bool isUTF8CharNotCrLf() {{ return peek() != '\\\\r' && peek() != '\\\\n' && peek() != '\\\\0'; }}\n", + " bool isUTF8CharLitCont() {{ return peek() != '\\'' && peek() != '\\\\\\\\'; }}\n", + " bool isUTF8StringLitCont() {{ return peek() != '\"' && peek() != '\\\\\\\\'; }}\n", + "\n", + "public:\n", + " AssemblyParser(std::string input) : src(input) {{}}\n", + "\n", + " void parse() {{\n", + " parse_program(); \n", + " if (pos < src.length()) throw std::runtime_error(\"Trailing characters left unparsed.\");\n", + " std::cout << \"Assembly source compiled cleanly!\" << std::endl;\n", + " }}\n", + "\n", + "{cpp_functions}\n", + "}};\n", + "\"\"\"\n", + "\n", + " def rule(self, args):\n", + " name, expr = args\n", + " return f\" void parse_{name}() {{\\n{expr}\\n }}\"\n", + "\n", + " # FIX 1: Explicitly handle choice logic using C++ style paths\n", + " def alternation(self, items):\n", + " code_lines = []\n", + " for i, item in enumerate(items):\n", + " # Clean up padding whitespace if any\n", + " clean_item = str(item).strip()\n", + " if not clean_item: continue\n", + " \n", + " # Since lookahead processing requires FIRST sets, we scaffold a sequential fallback\n", + " if i == 0:\n", + " code_lines.append(f\" if (/* option {i+1} */ true) {{\\n {clean_item}\\n }}\")\n", + " else:\n", + " code_lines.append(f\" else if (/* option {i+1} */ true) {{\\n {clean_item}\\n }}\")\n", + " return \"\\n\".join(code_lines)\n", + "\n", + " def sequence(self, items):\n", + " flattened_items = []\n", + " for item in items:\n", + " if isinstance(item, list):\n", + " for sub_item in item:\n", + " if sub_item: flattened_items.append(str(sub_item).strip())\n", + " elif item:\n", + " flattened_items.append(str(item).strip())\n", + " return \"\\n\".join(f\" {item}\" for item in flattened_items if item)\n", + "\n", + " def call_rule(self, token):\n", + " rule_name = token[0].value if isinstance(token, list) else token.value\n", + " return f\"parse_{rule_name}();\"\n", + "\n", + " # FIX 2: Generate match_string instead of match_char for multi-char string keywords like \"include\"\n", + " def match_terminal(self, token):\n", + " raw_token_str = token[0].value if isinstance(token, list) else token.value\n", + " raw_val = raw_token_str[1:-1]\n", + " \n", + " if raw_val == r\"\\r\": return \"match_char('\\\\r');\"\n", + " if raw_val == r\"\\n\": return \"match_char('\\\\n');\"\n", + " if raw_val == r\"\\t\": return \"match_char('\\\\t');\"\n", + " if raw_val == r\"\\\\\": return \"match_char('\\\\\\\\');\"\n", + " if not raw_val: return \"// Empty string match\"\n", + " \n", + " if len(raw_val) > 1:\n", + " return f\"match_string(\\\"{raw_val}\\\");\"\n", + " return f\"match_char('{raw_val}');\"\n", + "\n", + " def handle_special(self, token):\n", + " raw_string = token[0].value if isinstance(token, list) else token.value\n", + " func_name = raw_string.strip('?').strip()\n", + " return f\"if ({func_name}()) {{ pos++; }} else {{ throw std::runtime_error(\\\"Failed validation for {func_name}\\\"); }}\"\n", + "\n", + " def optional(self, args):\n", + " content = args[0] if not isinstance(args[0], list) else \"\\n \".join(args[0])\n", + " return f\"// Optional block\\n if (/* lookahead check */ true) {{\\n {content}\\n }}\"\n", + "\n", + " def repeat(self, args):\n", + " content = args[0] if not isinstance(args[0], list) else \"\\n \".join(args[0])\n", + " return f\"// Repeat block\\n while (/* lookahead check */ true) {{\\n {content}\\n }}\"\n", + "\n", + " def group(self, args):\n", + " # Flatten grouped elements cleanly to strings\n", + " if isinstance(args, list):\n", + " return \"\\n\".join(str(x) for x in args)\n", + " return str(args)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "558915ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Starting C++ Compilation Loop ---\n", + "Parsing and converting target rule sets for: assembly\n", + "🎉 Code generation complete! Output stored in './spider/compiler/assembly/AssemblyParser.hpp'\n" + ] + } + ], + "source": [ + "print(\"--- Starting C++ Compilation Loop ---\")\n", + "\n", + "try:\n", + " meta_parser = Lark(iso_ebnf_meta_grammar, parser='lalr')\n", + " \n", + " for name, target in ebnf_targets.items():\n", + " print(f\"Parsing and converting target rule sets for: {name}\")\n", + " \n", + " # Build the compiler AST tree from your exact text\n", + " syntax_tree = meta_parser.parse(target[\"cnt\"])\n", + " \n", + " # Transform the AST structural nodes into pure C++ Source strings\n", + " compiler_transformer = AssemblyCppGenerator()\n", + " compiled_cpp_header = compiler_transformer.transform(syntax_tree)\n", + " \n", + " # Output directly to your destination path\n", + " os.makedirs(os.path.dirname(target[\"dst\"]), exist_ok=True)\n", + " with open(target[\"dst\"], \"w\", encoding=\"utf-8\") as f:\n", + " f.write(compiled_cpp_header)\n", + " \n", + " print(f\"🎉 Code generation complete! Output stored in '{target['dst']}'\")\n", + "\n", + "except Exception as e:\n", + " print(f\"❌ Failed to process custom architecture. Error details: \\n{e}\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "366688c3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd1aca3f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/samples/assembly.ebnf b/samples/assembly.ebnf new file mode 100644 index 0000000..849401e --- /dev/null +++ b/samples/assembly.ebnf @@ -0,0 +1,76 @@ +(* Spider Assembly EBNF | Sintek Analytics @ 2026 | All Rights Reserved *) + +(* Characters & Structures *) +letter = ? isUTF8Alpha ? ; +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +alpha_num_char = letter | digit ; + +hex_digit = digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" ; +octal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ; +binary_digit = "0" | "1" ; + +ws_char = ? isWhithespaceCharNotCrLf ? ; +ws_optional = { ws_char } ; +whitespace = ws_char , { ws_char } ; +newline = "\r" | "\n" | "\r\n" ; +utf8_char = ? isUTF8CharNotCrLf ? ; + +char_escape = "\\", utf8_char ; +char_content = char_escape | ? isUTF8CharLitCont ? ; (* Not ' or \ *) +char_lit = "'", char_content, "'" ; + +string_char = char_escape | ? isUTF8StringLitCont ? ; (* Not " or \ *) +string_lit = '"', { string_char }, '"' ; + +(* Literals *) +identifier = ( letter | "_" ) , { alpha_num_char | "_" } ; +comment = ";" , { utf8_char } ; + +sign = "+" | "-" ; +exponent_marker = "e" | "E" ; +exponent = exponent_marker , [ sign ] , digit , { digit } ; + +decimal_lit = [ sign ] , digit , { digit } , [ "B" | "S" | "I" | "L" ] ; +float_lit = [ sign ] , ( + ( digit , { digit } , "." , digit , { digit } , [ exponent ] ) | + ( "." , digit , { digit } , [ exponent ] ) | + ( digit , { digit } , exponent ) + ) , [ "F" | "D" ] ; + +hex_lit = [ sign ] , "0x" , hex_digit , { hex_digit } ; +octal_lit = [ sign ] , "0c" , octal_digit , { octal_digit } ; +binary_lit = [ sign ] , "0b" , binary_digit , { binary_digit } ; + +literal = decimal_lit | float_lit | hex_lit | octal_lit | binary_lit | string_lit | char_lit ; +literal_cast = ("B" | "S" | "I" | "L" | "F" | "D"), ws_optional, "(", ws_optional, literal, ws_optional, ")" ; +literal_decl = literal | literal_cast ; + +(* Operands *) +register = "R" , alpha_num_char , alpha_num_char ; +addrm_ind = "[", ws_optional, literal_decl, ws_optional, "]" ; +addrm_ptr = "[", ws_optional, register, ws_optional, "]" ; +addrm_idx = "[", ws_optional, register, ws_optional, "+", ws_optional, literal_decl, ws_optional, "]"; +addrm_sca = "[", ws_optional, register, ws_optional, "+", register, ws_optional, "*", ws_optional, literal_decl, ws_optional, "]"; +addrm_dis = "[", ws_optional, register, ws_optional, "+", register, ws_optional, "*", ws_optional, literal_decl, ws_optional, "+", ws_optional, literal_decl, ws_optional, "]"; +addr_modes = addrm_ind | addrm_ptr | addrm_idx | addrm_sca | addrm_dis ; +operand = register | identifier | literal_decl | addr_modes ; + +(* Generalized Instructions *) +opcode = letter , { alpha_num_char } ; +operand_list = operand , { "," , ws_optional , operand } ; +instruction = opcode , [ whitespace , operand_list ] ; + +(* Added Preprocessor, Sections, and Metadata Syntaxes *) +include_decl = "include", whitespace, string_lit ; +annotation_oper = identifier, [ ws_optional, "=", ws_optional, literal_decl ] ; +annotation_ops = annotation_oper , { ws_optional, "," , ws_optional , annotation_oper } ; +annotation_args = "(", ws_optional, annotation_ops, ws_optional, ")" ; +annotation = "@", identifier, [ annotation_args ] ; +section_decl = "section", whitespace, ".", identifier ; + +(* Line Structure *) +label = identifier, ":" ; +line_content = include_decl | section_decl | ( [ annotation, whitespace ], [ label, ws_optional ], [ instruction ] ) ; +line = ws_optional, [ line_content ], ws_optional , [ comment ] , newline ; +line_last = ws_optional, [ line_content ], ws_optional , [ comment ] ; +program = { line }, [ line_last ] ; diff --git a/samples/factorial.spasm b/samples/factorial.spasm new file mode 100644 index 0000000..be12b4d --- /dev/null +++ b/samples/factorial.spasm @@ -0,0 +1,11 @@ +@asm +.data +.code +MOV RA, 1 +MOV RB, 8 ; Input number +:loop_start +MUL RA, RB +NOT RB ; RB != 0? Updates equal flag +DEC RB ; RB -= 1 +JEQ loop_start ; If equal flag, goto loop_start +; End program, result in RA diff --git a/spider-compiler.code-workspace b/spider-compiler.code-workspace new file mode 100644 index 0000000..07b9241 --- /dev/null +++ b/spider-compiler.code-workspace @@ -0,0 +1,21 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "gitlens.remotes": [ + { + "domain": "git.sintekanalytics.com", + "type": "Gitea", + "name": "Sintek Analytics' Git", + "protocol": "https", + } + ], + "C_Cpp.default.includePath": [ + "./src" + ], + "terminal.integrated.defaultProfile.windows": "MSYS2 UCRT" + } +} \ No newline at end of file diff --git a/spider/compiler/assembly/AssemblyParser.hpp b/spider/compiler/assembly/AssemblyParser.hpp new file mode 100644 index 0000000..09fd1e0 --- /dev/null +++ b/spider/compiler/assembly/AssemblyParser.hpp @@ -0,0 +1,812 @@ +#pragma once + +#include +#include +#include +#include + +class AssemblyParser { +private: + std::string src; + size_t pos = 0; + + std::string peek_str(size_t len) { + if (pos + len <= src.length()) return src.substr(pos, len); + return src.substr(pos); + } + + char peek() { return pos < src.length() ? src[pos] : '\0'; } + + void match_char(char expected) { + if (peek() == expected) pos++; + else throw std::runtime_error("Unexpected token matching character"); + } + + void match_string(std::string expected) { + if (peek_str(expected.length()) == expected) pos += expected.length(); + else throw std::runtime_error("Unexpected token matching string: " + expected); + } + + bool isUTF8Alpha() { return isalpha(peek()); } + bool isWhithespaceCharNotCrLf() { return peek() == ' ' || peek() == '\t'; } + bool isUTF8CharNotCrLf() { return peek() != '\r' && peek() != '\n' && peek() != '\0'; } + bool isUTF8CharLitCont() { return peek() != '\'' && peek() != '\\'; } + bool isUTF8StringLitCont() { return peek() != '"' && peek() != '\\'; } + +public: + AssemblyParser(std::string input) : src(input) {} + + void parse() { + parse_program(); + if (pos < src.length()) throw std::runtime_error("Trailing characters left unparsed."); + std::cout << "Assembly source compiled cleanly!" << std::endl; + } + + void parse_letter() { + if (/* option 1 */ true) { + if (isUTF8Alpha()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8Alpha"); } + } + } + + void parse_digit() { + if (/* option 1 */ true) { + match_char('0'); + } else if (/* option 2 */ true) { + match_char('1'); + } else if (/* option 3 */ true) { + match_char('2'); + } else if (/* option 4 */ true) { + match_char('3'); + } else if (/* option 5 */ true) { + match_char('4'); + } else if (/* option 6 */ true) { + match_char('5'); + } else if (/* option 7 */ true) { + match_char('6'); + } else if (/* option 8 */ true) { + match_char('7'); + } else if (/* option 9 */ true) { + match_char('8'); + } else if (/* option 10 */ true) { + match_char('9'); + } + } + + void parse_alpha_num_char() { + if (/* option 1 */ true) { + parse_letter(); + } else if (/* option 2 */ true) { + parse_digit(); + } + } + + void parse_hex_digit() { + if (/* option 1 */ true) { + parse_digit(); + } else if (/* option 2 */ true) { + match_char('A'); + } else if (/* option 3 */ true) { + match_char('B'); + } else if (/* option 4 */ true) { + match_char('C'); + } else if (/* option 5 */ true) { + match_char('D'); + } else if (/* option 6 */ true) { + match_char('E'); + } else if (/* option 7 */ true) { + match_char('F'); + } else if (/* option 8 */ true) { + match_char('a'); + } else if (/* option 9 */ true) { + match_char('b'); + } else if (/* option 10 */ true) { + match_char('c'); + } else if (/* option 11 */ true) { + match_char('d'); + } else if (/* option 12 */ true) { + match_char('e'); + } else if (/* option 13 */ true) { + match_char('f'); + } + } + + void parse_octal_digit() { + if (/* option 1 */ true) { + match_char('0'); + } else if (/* option 2 */ true) { + match_char('1'); + } else if (/* option 3 */ true) { + match_char('2'); + } else if (/* option 4 */ true) { + match_char('3'); + } else if (/* option 5 */ true) { + match_char('4'); + } else if (/* option 6 */ true) { + match_char('5'); + } else if (/* option 7 */ true) { + match_char('6'); + } else if (/* option 8 */ true) { + match_char('7'); + } + } + + void parse_binary_digit() { + if (/* option 1 */ true) { + match_char('0'); + } else if (/* option 2 */ true) { + match_char('1'); + } + } + + void parse_ws_char() { + if (/* option 1 */ true) { + if (isWhithespaceCharNotCrLf()) { pos++; } else { throw std::runtime_error("Failed validation for isWhithespaceCharNotCrLf"); } + } + } + + void parse_ws_optional() { + if (/* option 1 */ true) { + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_ws_char(); + } + } + } + } + + void parse_whitespace() { + if (/* option 1 */ true) { + parse_ws_char(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_ws_char(); + } + } + } + } + + void parse_newline() { + if (/* option 1 */ true) { + match_char('\r'); + } else if (/* option 2 */ true) { + match_char('\n'); + } else if (/* option 3 */ true) { + match_string("\r\n"); + } + } + + void parse_utf8_char() { + if (/* option 1 */ true) { + if (isUTF8CharNotCrLf()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8CharNotCrLf"); } + } + } + + void parse_char_escape() { + if (/* option 1 */ true) { + match_char('\\'); + parse_utf8_char(); + } + } + + void parse_char_content() { + if (/* option 1 */ true) { + parse_char_escape(); + } else if (/* option 2 */ true) { + if (isUTF8CharLitCont()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8CharLitCont"); } + } + } + + void parse_char_lit() { + if (/* option 1 */ true) { + match_char('\''); + parse_char_content(); + match_char('\''); + } + } + + void parse_string_char() { + if (/* option 1 */ true) { + parse_char_escape(); + } else if (/* option 2 */ true) { + if (isUTF8StringLitCont()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8StringLitCont"); } + } + } + + void parse_string_lit() { + if (/* option 1 */ true) { + match_char('"'); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_string_char(); + } + } + match_char('"'); + } + } + + void parse_identifier() { + if (/* option 1 */ true) { + if (/* option 1 */ true) { + parse_letter(); + } else if (/* option 2 */ true) { + match_char('_'); + } + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_alpha_num_char(); + } else if (/* option 2 */ true) { + match_char('_'); + } + } + } + } + + void parse_comment() { + if (/* option 1 */ true) { + match_char(';'); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_utf8_char(); + } + } + } + } + + void parse_sign() { + if (/* option 1 */ true) { + match_char('+'); + } else if (/* option 2 */ true) { + match_char('-'); + } + } + + void parse_exponent_marker() { + if (/* option 1 */ true) { + match_char('e'); + } else if (/* option 2 */ true) { + match_char('E'); + } + } + + void parse_exponent() { + if (/* option 1 */ true) { + parse_exponent_marker(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_sign(); + } + } + parse_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_digit(); + } + } + } + } + + void parse_decimal_lit() { + if (/* option 1 */ true) { + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_sign(); + } + } + parse_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_digit(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + match_char('B'); + } else if (/* option 2 */ true) { + match_char('S'); + } else if (/* option 3 */ true) { + match_char('I'); + } else if (/* option 4 */ true) { + match_char('L'); + } + } + } + } + + void parse_float_lit() { + if (/* option 1 */ true) { + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_sign(); + } + } + if (/* option 1 */ true) { + if (/* option 1 */ true) { + parse_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_digit(); + } + } + match_char('.'); + parse_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_digit(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_exponent(); + } + } + } + } else if (/* option 2 */ true) { + if (/* option 1 */ true) { + match_char('.'); + parse_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_digit(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_exponent(); + } + } + } + } else if (/* option 3 */ true) { + if (/* option 1 */ true) { + parse_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_digit(); + } + } + parse_exponent(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + match_char('F'); + } else if (/* option 2 */ true) { + match_char('D'); + } + } + } + } + + void parse_hex_lit() { + if (/* option 1 */ true) { + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_sign(); + } + } + match_string("0x"); + parse_hex_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_hex_digit(); + } + } + } + } + + void parse_octal_lit() { + if (/* option 1 */ true) { + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_sign(); + } + } + match_string("0c"); + parse_octal_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_octal_digit(); + } + } + } + } + + void parse_binary_lit() { + if (/* option 1 */ true) { + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_sign(); + } + } + match_string("0b"); + parse_binary_digit(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_binary_digit(); + } + } + } + } + + void parse_literal() { + if (/* option 1 */ true) { + parse_decimal_lit(); + } else if (/* option 2 */ true) { + parse_float_lit(); + } else if (/* option 3 */ true) { + parse_hex_lit(); + } else if (/* option 4 */ true) { + parse_octal_lit(); + } else if (/* option 5 */ true) { + parse_binary_lit(); + } else if (/* option 6 */ true) { + parse_string_lit(); + } else if (/* option 7 */ true) { + parse_char_lit(); + } + } + + void parse_literal_cast() { + if (/* option 1 */ true) { + if (/* option 1 */ true) { + match_char('B'); + } else if (/* option 2 */ true) { + match_char('S'); + } else if (/* option 3 */ true) { + match_char('I'); + } else if (/* option 4 */ true) { + match_char('L'); + } else if (/* option 5 */ true) { + match_char('F'); + } else if (/* option 6 */ true) { + match_char('D'); + } + parse_ws_optional(); + match_char('('); + parse_ws_optional(); + parse_literal(); + parse_ws_optional(); + match_char(')'); + } + } + + void parse_literal_decl() { + if (/* option 1 */ true) { + parse_literal(); + } else if (/* option 2 */ true) { + parse_literal_cast(); + } + } + + void parse_register() { + if (/* option 1 */ true) { + match_char('R'); + parse_alpha_num_char(); + parse_alpha_num_char(); + } + } + + void parse_addrm_ind() { + if (/* option 1 */ true) { + match_char('['); + parse_ws_optional(); + parse_literal_decl(); + parse_ws_optional(); + match_char(']'); + } + } + + void parse_addrm_ptr() { + if (/* option 1 */ true) { + match_char('['); + parse_ws_optional(); + parse_register(); + parse_ws_optional(); + match_char(']'); + } + } + + void parse_addrm_idx() { + if (/* option 1 */ true) { + match_char('['); + parse_ws_optional(); + parse_register(); + parse_ws_optional(); + match_char('+'); + parse_ws_optional(); + parse_literal_decl(); + parse_ws_optional(); + match_char(']'); + } + } + + void parse_addrm_sca() { + if (/* option 1 */ true) { + match_char('['); + parse_ws_optional(); + parse_register(); + parse_ws_optional(); + match_char('+'); + parse_register(); + parse_ws_optional(); + match_char('*'); + parse_ws_optional(); + parse_literal_decl(); + parse_ws_optional(); + match_char(']'); + } + } + + void parse_addrm_dis() { + if (/* option 1 */ true) { + match_char('['); + parse_ws_optional(); + parse_register(); + parse_ws_optional(); + match_char('+'); + parse_register(); + parse_ws_optional(); + match_char('*'); + parse_ws_optional(); + parse_literal_decl(); + parse_ws_optional(); + match_char('+'); + parse_ws_optional(); + parse_literal_decl(); + parse_ws_optional(); + match_char(']'); + } + } + + void parse_addr_modes() { + if (/* option 1 */ true) { + parse_addrm_ind(); + } else if (/* option 2 */ true) { + parse_addrm_ptr(); + } else if (/* option 3 */ true) { + parse_addrm_idx(); + } else if (/* option 4 */ true) { + parse_addrm_sca(); + } else if (/* option 5 */ true) { + parse_addrm_dis(); + } + } + + void parse_operand() { + if (/* option 1 */ true) { + parse_register(); + } else if (/* option 2 */ true) { + parse_identifier(); + } else if (/* option 3 */ true) { + parse_literal_decl(); + } else if (/* option 4 */ true) { + parse_addr_modes(); + } + } + + void parse_opcode() { + if (/* option 1 */ true) { + parse_letter(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_alpha_num_char(); + } + } + } + } + + void parse_operand_list() { + if (/* option 1 */ true) { + parse_operand(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + match_char(','); + parse_ws_optional(); + parse_operand(); + } + } + } + } + + void parse_instruction() { + if (/* option 1 */ true) { + parse_opcode(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_whitespace(); + parse_operand_list(); + } + } + } + } + + void parse_include_decl() { + if (/* option 1 */ true) { + match_string("include"); + parse_whitespace(); + parse_string_lit(); + } + } + + void parse_annotation_oper() { + if (/* option 1 */ true) { + parse_identifier(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_ws_optional(); + match_char('='); + parse_ws_optional(); + parse_literal_decl(); + } + } + } + } + + void parse_annotation_ops() { + if (/* option 1 */ true) { + parse_annotation_oper(); + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_ws_optional(); + match_char(','); + parse_ws_optional(); + parse_annotation_oper(); + } + } + } + } + + void parse_annotation_args() { + if (/* option 1 */ true) { + match_char('('); + parse_ws_optional(); + parse_annotation_ops(); + parse_ws_optional(); + match_char(')'); + } + } + + void parse_annotation() { + if (/* option 1 */ true) { + match_char('@'); + parse_identifier(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_annotation_args(); + } + } + } + } + + void parse_section_decl() { + if (/* option 1 */ true) { + match_string("section"); + parse_whitespace(); + match_char('.'); + parse_identifier(); + } + } + + void parse_label() { + if (/* option 1 */ true) { + parse_identifier(); + match_char(':'); + } + } + + void parse_line_content() { + if (/* option 1 */ true) { + parse_include_decl(); + } else if (/* option 2 */ true) { + parse_section_decl(); + } else if (/* option 3 */ true) { + if (/* option 1 */ true) { + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_annotation(); + parse_whitespace(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_label(); + parse_ws_optional(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_instruction(); + } + } + } + } + } + + void parse_line() { + if (/* option 1 */ true) { + parse_ws_optional(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_line_content(); + } + } + parse_ws_optional(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_comment(); + } + } + parse_newline(); + } + } + + void parse_line_last() { + if (/* option 1 */ true) { + parse_ws_optional(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_line_content(); + } + } + parse_ws_optional(); + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_comment(); + } + } + } + } + + void parse_program() { + if (/* option 1 */ true) { + // Repeat block + while (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_line(); + } + } + // Optional block + if (/* lookahead check */ true) { + if (/* option 1 */ true) { + parse_line_last(); + } + } + } + } +}; diff --git a/src/spider/compiler/Compiler.cpp b/src/spider/compiler/Compiler.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/spider/compiler/Compiler.hpp b/src/spider/compiler/Compiler.hpp new file mode 100644 index 0000000..740b5cf --- /dev/null +++ b/src/spider/compiler/Compiler.hpp @@ -0,0 +1,10 @@ +#pragma + +#include + +namespace spider { + + class Token; + class RootToken; + +} diff --git a/src/spider/compiler/assembler/Assembler.cpp b/src/spider/compiler/assembler/Assembler.cpp new file mode 100644 index 0000000..7fa45fc --- /dev/null +++ b/src/spider/compiler/assembler/Assembler.cpp @@ -0,0 +1,35 @@ +#include "Assembler.hpp" + +namespace spider { + + Assembler::Assembler() {} + + Assembler::~Assembler() {} + + Assembler::Error Assembler::loadFile(const fs::path& path) { + // check if path exists + fs::path abs_path = fs::canonical(path); + if(!fs::exists(abs_path)) return Error::FILE_NOT_FOUND; + + // check if recursive + if(fstack.contains(abs_path)) return Error::FILE_RECURSIVE_LOAD; + auto ir = fstack.insert(abs_path); + + // Actually load! + levels.emplace_back(Level { + .reader = std::make_unique(new FileTextReader(abs_path.string())), + .source = abs_path.string(), + }); + parseCurrentLevel(); + + // alright! + fstack.erase(ir.first); + return Error::SUCCESS; + } + + void Assembler::parseCurrentLevel() { + auto& lvl = levels.back(); + + } + +} diff --git a/src/spider/compiler/assembler/Assembler.hpp b/src/spider/compiler/assembler/Assembler.hpp new file mode 100644 index 0000000..25ef9c0 --- /dev/null +++ b/src/spider/compiler/assembler/Assembler.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include + +#include + +#include + +namespace spider { + + /** + * The spider assembler, capable of + * converting text into bytecode. + */ + class Assembler { + public: + enum class Error { + SUCCESS, + FILE_NOT_FOUND, FILE_RECURSIVE_LOAD, + }; + struct Level { + uptr reader; + RootToken root; + std::string source; + }; + + public: + + set fstack; + deque levels; + + public: + + Assembler(); + + ~Assembler(); + + public: + + /** + * Attempts to load a file, fails if it + * doesn't exist. + */ + Error loadFile(const fs::path& path); + + private: + + void parseCurrentLevel(); + + }; + +} diff --git a/src/spider/compiler/assembler/Disassembler.hpp b/src/spider/compiler/assembler/Disassembler.hpp new file mode 100644 index 0000000..d98c1f1 --- /dev/null +++ b/src/spider/compiler/assembler/Disassembler.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace spider { + + /** + * A disassembler, capable of converting bytecode into + * readable text. + */ + class Disassembler {}; + +} diff --git a/src/spider/compiler/assembler/Dissasembler.cpp b/src/spider/compiler/assembler/Dissasembler.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/spider/compiler/common.hpp b/src/spider/compiler/common.hpp new file mode 100644 index 0000000..2674162 --- /dev/null +++ b/src/spider/compiler/common.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spider { + + // Absolute Types + using u8 = std::uint8_t; + using u16 = std::uint16_t; + using u32 = std::uint32_t; + using u64 = std::uint64_t; + + using i8 = std::int8_t; + using i16 = std::int16_t; + using i32 = std::int32_t; + using i64 = std::int64_t; + + using f32 = float; // TODO: SPIDER_EMULATE_FLOAT will control this + using f64 = double; + + // TODO: Check if we're on C++23, there is already stdfloat + static_assert(sizeof(f32) == 4, "The f32 type must be exactly 4 bytes."); + static_assert(sizeof(f64) == 8, "The f64 type must be exactly 8 bytes."); + + // Utility types + using isize = std::size_t; + + // Utility imports + using std::vector; + using std::deque; + using std::map; + using std::optional; + using std::set; + + template using ptr = std::shared_ptr; + template using uptr = std::unique_ptr; + + namespace fs = std::filesystem; + + struct pos { + isize line; + isize col; + pos(isize line = 1, isize col = 1) + : line(line), col(col) {} + }; + +} diff --git a/src/spider/compiler/text/TextReader.cpp b/src/spider/compiler/text/TextReader.cpp new file mode 100644 index 0000000..dc3a1c9 --- /dev/null +++ b/src/spider/compiler/text/TextReader.cpp @@ -0,0 +1,104 @@ +#include "TextReader.hpp" + +#include + +#include + +namespace spider { + + // Text Reader // + + int TextReader::nextByte() { + int ch = getStream().get(); + if (ch == std::istream::traits_type::eof()) { + return -1; + } + return ch; + } + + bool TextReader::nextChar(u32& ch) { + int n = nextByte(); + if(n == -1) return false; + + isize len = utf8::seqlen(u8(n)); + if(len == 0) return false; + + isize i = 1; + char arr[4]; + arr[0] = char(n); + + while(i < len) { + n = nextByte(); + if(n == -1) return false; + arr[i++] = char(n); + } + + ch = utf8::decodeArr(arr, len); + advance(ch); + return true; + } + + void TextReader::advance(u32 ch) { + if (ch == u32('\n')) { + if (lastWasCR) { + lastWasCR = false; // Mixed CRLF handling + } else { + at.line++; + at.col = 1; + } + } else if (ch == u32('\r')) { + at.line++; + at.col = 1; + lastWasCR = true; + } else { + at.col++; + lastWasCR = false; + } + } + + bool TextReader::isEOF() { + return getStream().peek() == std::istream::traits_type::eof(); + } + + pos TextReader::getPosition() const { + return at; + } + + // File Reader // + + FileTextReader::FileTextReader(const std::string& filename) + : fileStream(filename, std::ios::binary) { + if (!fileStream.is_open()) { + throw std::runtime_error("Failed to open file: " + filename); + } + } + + std::istream& FileTextReader::getStream() { + return fileStream; + } + + // String Reader // + + StringTextReader::StringTextReader(std::string initialText) + : buffer(std::move(initialText)), + stringStream(std::make_unique(buffer)) { + } + + std::istream& StringTextReader::getStream() { + return *stringStream; + } + + void StringTextReader::set(const std::string& newText) { + buffer = newText; + stringStream = std::make_unique(buffer); + lastWasCR = false; + } + + void StringTextReader::append(const std::string& extraText) { + std::streampos pos = stringStream->tellg(); + buffer += extraText; + stringStream = std::make_unique(buffer); + stringStream->seekg(pos); + } + +} diff --git a/src/spider/compiler/text/TextReader.hpp b/src/spider/compiler/text/TextReader.hpp new file mode 100644 index 0000000..b1d0130 --- /dev/null +++ b/src/spider/compiler/text/TextReader.hpp @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace spider { + + /** + * Abstract Text Reader + */ + class TextReader { + protected: + + pos at; + bool lastWasCR = false; + + public: + + TextReader() = default; + + virtual ~TextReader() = default; + + protected: + + int nextByte(); + + public: + + bool nextChar(u32& ch); + + bool isEOF(); + + pos getPosition() const; + + protected: + + void advance(u32 ch); + + virtual std::istream& getStream() = 0; + + }; + + /** + * File Text Reader + */ + class FileTextReader : public TextReader { + private: + + std::ifstream fileStream; + + public: + + explicit FileTextReader(const std::string& filename); + + protected: + + std::istream& getStream() override; + + }; + + /** + * String Text Reader + */ + class StringTextReader : public TextReader { + private: + + std::string buffer; + std::unique_ptr stringStream; + + public: + + explicit StringTextReader(std::string initialText = ""); + + public: + + void set(const std::string& newText); + + void append(const std::string& extraText); + + protected: + + std::istream& getStream() override; + + }; + +} diff --git a/src/spider/compiler/text/utf8.hpp b/src/spider/compiler/text/utf8.hpp new file mode 100644 index 0000000..5db2d90 --- /dev/null +++ b/src/spider/compiler/text/utf8.hpp @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include +#include +#include + +namespace spider { + + namespace utf8 { + + // --------------------- // + // UTF-8 Sequence Length // + // --------------------- // + + constexpr isize seqlen(u8 c) { + if ((c & 0x80) == 0x00) return 1; + if ((c & 0xE0) == 0xC0) return 2; + if ((c & 0xF0) == 0xE0) return 3; + if ((c & 0xF8) == 0xF0) return 4; + return 0; + } + + constexpr bool isCont(u8 c) { + return (c & 0xC0) == 0x80; + } + + constexpr isize isValidSeq(const char* src, isize len) { + if (len == 0) return 0; + isize m = seqlen(u8(src[0])); + if (m == 0 || m > len) return 0; + for (isize i = 1; i < m; i++) { + if (!isCont(u8(src[i]))) return 0; + } + return m; + } + + // ----------------- // + // UTF-8 into UTF-32 // + // ----------------- // + + inline isize decode(const char* src, isize len, u32& out) { + // check input is valid + isize charlen = isValidSeq(src, len); + if (charlen == 0) return 0; + + // map of masks, starts at 1 + static constexpr u8 firstMask[5] = { + 0x00, // unused + 0x7F, // 0xxxxxxx + 0x1F, // 110xxxxx + 0x0F, // 1110xxxx + 0x07 // 11110xxx + }; + + // assemble the char + out = u8(src[0]) & firstMask[charlen]; + for (isize i = 1; i < charlen; ++i) { + out <<= 6; + out |= u8(src[i]) & 0x3F; + } + return charlen; + } + + /** + * A simpler version, which consider it already + * having a validated input array + */ + inline u32 decodeArr(const char* src, isize chlen) { + // map of masks, starts at 1 + static constexpr u8 firstMask[5] = { + 0x00, // unused + 0x7F, // 0xxxxxxx + 0x1F, // 110xxxxx + 0x0F, // 1110xxxx + 0x07 // 11110xxx + }; + + // assemble the char + u32 out = u8(src[0]) & firstMask[chlen]; + for (isize i = 1; i < chlen; ++i) { + out <<= 6; + out |= u8(src[i]) & 0x3F; + } + return out; + } + + } + +} diff --git a/src/spider/compiler/tokens/RootToken.cpp b/src/spider/compiler/tokens/RootToken.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/spider/compiler/tokens/RootToken.hpp b/src/spider/compiler/tokens/RootToken.hpp new file mode 100644 index 0000000..e74ed68 --- /dev/null +++ b/src/spider/compiler/tokens/RootToken.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace spider { + + /** + * Defines the root of a token. + */ + class RootToken { + private: + + public: + + RootToken(); + + ~RootToken(); + + public: + + void token(); + + }; + +} diff --git a/src/spider/compiler/tokens/Token.cpp b/src/spider/compiler/tokens/Token.cpp new file mode 100644 index 0000000..86b759f --- /dev/null +++ b/src/spider/compiler/tokens/Token.cpp @@ -0,0 +1,26 @@ +#include "Token.hpp" + +namespace spider { + + Token::Token(pos _at, TokenType _type, std::string _str) + : at(_at), type(_type), str(_str) {} + + Token::Token(const Token& tok) + : at(tok.at), type(tok.type), str(tok.str), inner(tok.inner) {} + + Token::Token(Token&& tok) + : at(tok.at), type(tok.type), str(tok.str), inner(std::move(tok.inner)) {} + + void Token::append(const Token& tok) { + inner.push_back(tok); + } + + vector Token::getInner() { + return inner; + } + + isize Token::innerCount() { + return inner.size(); + } + +} diff --git a/src/spider/compiler/tokens/Token.hpp b/src/spider/compiler/tokens/Token.hpp new file mode 100644 index 0000000..27a99d4 --- /dev/null +++ b/src/spider/compiler/tokens/Token.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +namespace spider { + + /** + * Token type. + */ + enum class TokenType { + // Assembly + PREPROCESSOR_TAG, + WHITESPACE, NEWLINE, + INSTRUCTION, OPCODE, + OPERATOR, OPERAND, + REGISTER, NUMBER, BIN_NUMBER, OCT_NUMBER, HEX_NUMBER, ADDR_NUMBER, + BRACKET, BRACKET_IND, BRACKET_PTR, BRACKET_IDX, COMMA, COMMENT, + SECTION, VARIABLE, ASSIGNMENT + + // Classic + // Script + }; + + /** + * Defines a general token. + */ + class Token { + public: + + const pos at; + const TokenType type; + const std::string str; + + private: + + vector inner; + + public: + + Token(pos _at, TokenType _type, std::string _str); + + Token(const Token& tok); + + Token(Token&& tok); + + public: + + void append(const Token& tok); + + vector getInner(); + + isize innerCount(); + + }; + +}