303 lines
11 KiB
Plaintext
303 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 95,
|
|
"id": "00e26c5b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from lark import Lark, Transformer\n",
|
|
"import os"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 96,
|
|
"id": "cc16be1a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ebnf_targets = {\n",
|
|
" \"assembly\": {\n",
|
|
" \"src\": \"./samples/assembly.ebnf\",\n",
|
|
" \"dst\": \"./spider/compiler/assembly/AssemblyParser.hpp\",\n",
|
|
" \"cnt\": None,\n",
|
|
" },\n",
|
|
"}\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 97,
|
|
"id": "e88d212f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"--- Loading EBNF Targets ---\n",
|
|
"✅ Success [assembly]: Loaded './samples/assembly.ebnf' -> Target destination: './spider/compiler/assembly/AssemblyParser.hpp'\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\"\\n--- Loading EBNF Targets ---\")\n",
|
|
"for target_name, paths in ebnf_targets.items():\n",
|
|
" src_path = paths[\"src\"]\n",
|
|
" dst_path = paths[\"dst\"]\n",
|
|
" \n",
|
|
" try:\n",
|
|
" with open(src_path, \"r\", encoding=\"utf-8\") as file:\n",
|
|
" paths[\"cnt\"] = file.read()\n",
|
|
" print(f\"✅ Success [{target_name}]: Loaded '{src_path}' -> Target destination: '{dst_path}'\")\n",
|
|
" \n",
|
|
" except FileNotFoundError:\n",
|
|
" print(f\"❌ Error [{target_name}]: Source file not found at '{src_path}'\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"id": "e8095002",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"from lark import Lark, Transformer\n",
|
|
"\n",
|
|
"iso_ebnf_meta_grammar = r\"\"\"\n",
|
|
" start: rule+\n",
|
|
" rule: RULE_NAME \"=\" expression \";\"\n",
|
|
" \n",
|
|
" ?expression: alternation\n",
|
|
" alternation: sequence (\"|\" sequence)*\n",
|
|
" \n",
|
|
" sequence: item ( [\",\"] item )*\n",
|
|
" \n",
|
|
" ?item: atom\n",
|
|
" | atom \"?\" -> optional\n",
|
|
" | atom \"*\" -> repeat\n",
|
|
" | \"[\" expression \"]\" -> optional\n",
|
|
" | \"{\" expression \"}\" -> repeat\n",
|
|
" \n",
|
|
" ?atom: RULE_NAME -> call_rule\n",
|
|
" | TERMINAL -> match_terminal\n",
|
|
" | SPECIAL_SEQ -> handle_special\n",
|
|
" | \"(\" expression \")\" -> group\n",
|
|
"\n",
|
|
" RULE_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/\n",
|
|
" TERMINAL: /\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"/ | /'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'/\n",
|
|
" SPECIAL_SEQ: /\\?[\\s\\S]*?\\?/\n",
|
|
" COMMENT: /\\(\\*([\\s\\S]*?)\\*\\)/\n",
|
|
"\n",
|
|
" %import common.WS\n",
|
|
" %ignore WS\n",
|
|
" %ignore COMMENT\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"class AssemblyCppGenerator(Transformer):\n",
|
|
" def start(self, rules):\n",
|
|
" cpp_functions = \"\\n\\n\".join(rules)\n",
|
|
" return f\"\"\"#pragma once\n",
|
|
"\n",
|
|
"#include <iostream>\n",
|
|
"#include <string>\n",
|
|
"#include <vector>\n",
|
|
"#include <stdexcept>\n",
|
|
"\n",
|
|
"class AssemblyParser {{\n",
|
|
"private:\n",
|
|
" std::string src;\n",
|
|
" size_t pos = 0;\n",
|
|
"\n",
|
|
" std::string peek_str(size_t len) {{\n",
|
|
" if (pos + len <= src.length()) return src.substr(pos, len);\n",
|
|
" return src.substr(pos);\n",
|
|
" }}\n",
|
|
"\n",
|
|
" char peek() {{ return pos < src.length() ? src[pos] : '\\\\0'; }}\n",
|
|
" \n",
|
|
" void match_char(char expected) {{\n",
|
|
" if (peek() == expected) pos++;\n",
|
|
" else throw std::runtime_error(\"Unexpected token matching character\");\n",
|
|
" }}\n",
|
|
"\n",
|
|
" void match_string(std::string expected) {{\n",
|
|
" if (peek_str(expected.length()) == expected) pos += expected.length();\n",
|
|
" else throw std::runtime_error(\"Unexpected token matching string: \" + expected);\n",
|
|
" }}\n",
|
|
"\n",
|
|
" bool isUTF8Alpha() {{ return isalpha(peek()); }}\n",
|
|
" bool isWhithespaceCharNotCrLf() {{ return peek() == ' ' || peek() == '\\\\t'; }}\n",
|
|
" bool isUTF8CharNotCrLf() {{ return peek() != '\\\\r' && peek() != '\\\\n' && peek() != '\\\\0'; }}\n",
|
|
" bool isUTF8CharLitCont() {{ return peek() != '\\'' && peek() != '\\\\\\\\'; }}\n",
|
|
" bool isUTF8StringLitCont() {{ return peek() != '\"' && peek() != '\\\\\\\\'; }}\n",
|
|
"\n",
|
|
"public:\n",
|
|
" AssemblyParser(std::string input) : src(input) {{}}\n",
|
|
"\n",
|
|
" void parse() {{\n",
|
|
" parse_program(); \n",
|
|
" if (pos < src.length()) throw std::runtime_error(\"Trailing characters left unparsed.\");\n",
|
|
" std::cout << \"Assembly source compiled cleanly!\" << std::endl;\n",
|
|
" }}\n",
|
|
"\n",
|
|
"{cpp_functions}\n",
|
|
"}};\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
" def rule(self, args):\n",
|
|
" name, expr = args\n",
|
|
" return f\" void parse_{name}() {{\\n{expr}\\n }}\"\n",
|
|
"\n",
|
|
" # FIX 1: Explicitly handle choice logic using C++ style paths\n",
|
|
" def alternation(self, items):\n",
|
|
" code_lines = []\n",
|
|
" for i, item in enumerate(items):\n",
|
|
" # Clean up padding whitespace if any\n",
|
|
" clean_item = str(item).strip()\n",
|
|
" if not clean_item: continue\n",
|
|
" \n",
|
|
" # Since lookahead processing requires FIRST sets, we scaffold a sequential fallback\n",
|
|
" if i == 0:\n",
|
|
" code_lines.append(f\" if (/* option {i+1} */ true) {{\\n {clean_item}\\n }}\")\n",
|
|
" else:\n",
|
|
" code_lines.append(f\" else if (/* option {i+1} */ true) {{\\n {clean_item}\\n }}\")\n",
|
|
" return \"\\n\".join(code_lines)\n",
|
|
"\n",
|
|
" def sequence(self, items):\n",
|
|
" flattened_items = []\n",
|
|
" for item in items:\n",
|
|
" if isinstance(item, list):\n",
|
|
" for sub_item in item:\n",
|
|
" if sub_item: flattened_items.append(str(sub_item).strip())\n",
|
|
" elif item:\n",
|
|
" flattened_items.append(str(item).strip())\n",
|
|
" return \"\\n\".join(f\" {item}\" for item in flattened_items if item)\n",
|
|
"\n",
|
|
" def call_rule(self, token):\n",
|
|
" rule_name = token[0].value if isinstance(token, list) else token.value\n",
|
|
" return f\"parse_{rule_name}();\"\n",
|
|
"\n",
|
|
" # FIX 2: Generate match_string instead of match_char for multi-char string keywords like \"include\"\n",
|
|
" def match_terminal(self, token):\n",
|
|
" raw_token_str = token[0].value if isinstance(token, list) else token.value\n",
|
|
" raw_val = raw_token_str[1:-1]\n",
|
|
" \n",
|
|
" if raw_val == r\"\\r\": return \"match_char('\\\\r');\"\n",
|
|
" if raw_val == r\"\\n\": return \"match_char('\\\\n');\"\n",
|
|
" if raw_val == r\"\\t\": return \"match_char('\\\\t');\"\n",
|
|
" if raw_val == r\"\\\\\": return \"match_char('\\\\\\\\');\"\n",
|
|
" if not raw_val: return \"// Empty string match\"\n",
|
|
" \n",
|
|
" if len(raw_val) > 1:\n",
|
|
" return f\"match_string(\\\"{raw_val}\\\");\"\n",
|
|
" return f\"match_char('{raw_val}');\"\n",
|
|
"\n",
|
|
" def handle_special(self, token):\n",
|
|
" raw_string = token[0].value if isinstance(token, list) else token.value\n",
|
|
" func_name = raw_string.strip('?').strip()\n",
|
|
" return f\"if ({func_name}()) {{ pos++; }} else {{ throw std::runtime_error(\\\"Failed validation for {func_name}\\\"); }}\"\n",
|
|
"\n",
|
|
" def optional(self, args):\n",
|
|
" content = args[0] if not isinstance(args[0], list) else \"\\n \".join(args[0])\n",
|
|
" return f\"// Optional block\\n if (/* lookahead check */ true) {{\\n {content}\\n }}\"\n",
|
|
"\n",
|
|
" def repeat(self, args):\n",
|
|
" content = args[0] if not isinstance(args[0], list) else \"\\n \".join(args[0])\n",
|
|
" return f\"// Repeat block\\n while (/* lookahead check */ true) {{\\n {content}\\n }}\"\n",
|
|
"\n",
|
|
" def group(self, args):\n",
|
|
" # Flatten grouped elements cleanly to strings\n",
|
|
" if isinstance(args, list):\n",
|
|
" return \"\\n\".join(str(x) for x in args)\n",
|
|
" return str(args)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 99,
|
|
"id": "558915ff",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"--- Starting C++ Compilation Loop ---\n",
|
|
"Parsing and converting target rule sets for: assembly\n",
|
|
"🎉 Code generation complete! Output stored in './spider/compiler/assembly/AssemblyParser.hpp'\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\"--- Starting C++ Compilation Loop ---\")\n",
|
|
"\n",
|
|
"try:\n",
|
|
" meta_parser = Lark(iso_ebnf_meta_grammar, parser='lalr')\n",
|
|
" \n",
|
|
" for name, target in ebnf_targets.items():\n",
|
|
" print(f\"Parsing and converting target rule sets for: {name}\")\n",
|
|
" \n",
|
|
" # Build the compiler AST tree from your exact text\n",
|
|
" syntax_tree = meta_parser.parse(target[\"cnt\"])\n",
|
|
" \n",
|
|
" # Transform the AST structural nodes into pure C++ Source strings\n",
|
|
" compiler_transformer = AssemblyCppGenerator()\n",
|
|
" compiled_cpp_header = compiler_transformer.transform(syntax_tree)\n",
|
|
" \n",
|
|
" # Output directly to your destination path\n",
|
|
" os.makedirs(os.path.dirname(target[\"dst\"]), exist_ok=True)\n",
|
|
" with open(target[\"dst\"], \"w\", encoding=\"utf-8\") as f:\n",
|
|
" f.write(compiled_cpp_header)\n",
|
|
" \n",
|
|
" print(f\"🎉 Code generation complete! Output stored in '{target['dst']}'\")\n",
|
|
"\n",
|
|
"except Exception as e:\n",
|
|
" print(f\"❌ Failed to process custom architecture. Error details: \\n{e}\")\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "366688c3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cd1aca3f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|