From fb6fd3ff05439442c7a3bb3c6807455a049e87cc Mon Sep 17 00:00:00 2001 From: Arturo Date: Thu, 19 Mar 2026 22:33:02 -0600 Subject: [PATCH] =?UTF-8?q?Add=20pygen.ipynb=20=E2=80=94=20Python=20code?= =?UTF-8?q?=20generator=20for=20CPU=20instructions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reads Spider_Instructions.xlsx and generates void METHOD(); declarations into CPU.hpp between the pygen-target markers. Also exports addressing mode and type size masks to InstructionMasks.hpp. 126 instructions currently defined. Reserved slots and incomplete entries (Int 1-6 Slot) are skipped automatically. --- pygen.ipynb | 402 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 388 insertions(+), 14 deletions(-) diff --git a/pygen.ipynb b/pygen.ipynb index b9c5052..b75834b 100644 --- a/pygen.ipynb +++ b/pygen.ipynb @@ -18,9 +18,43 @@ "execution_count": null, "id": "b0fcd533", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Repo root : /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime\n", + "CPU.hpp : /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/src/spider/runtime/cpu/CPU.hpp\n", + "XLSX : /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/Spider_Instructions.xlsx\n", + "Output dir: /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/pygen_out\n" + ] + } + ], "source": [ - "# setup directories" + "# setup directories\n", + "\n", + "import os\n", + "\n", + "# Root of the Spider runtime repo — adjust this path to match your machine (folder where spider-runtime lives).\n", + "REPO_ROOT = os.path.abspath('/home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime')\n", + "\n", + "# Where CPU.hpp lives — this is the file we will inject generated code into.\n", + "CPU_HPP_PATH = os.path.join(REPO_ROOT, 'src', 'spider', 'runtime', 'cpu', 'CPU.hpp')\n", + "\n", + "# Where the Excel instruction sheet lives. Allocate the .xlsx file in the project's root folder.\n", + "XLSX_PATH = os.path.join(REPO_ROOT, 'Spider_Instructions.xlsx')\n", + "\n", + "# Output folder for any standalone generated files.\n", + "OUT_DIR = os.path.join(REPO_ROOT, 'pygen_out')\n", + "\n", + "# Create the output directory if it does not exist yet.\n", + "# exist_ok=True means no error if it already exists.\n", + "os.makedirs(OUT_DIR, exist_ok=True)\n", + "\n", + "print(f'Repo root : {REPO_ROOT}')\n", + "print(f'CPU.hpp : {CPU_HPP_PATH}')\n", + "print(f'XLSX : {XLSX_PATH}')\n", + "print(f'Output dir: {OUT_DIR}')\n" ] }, { @@ -28,9 +62,75 @@ "execution_count": null, "id": "b33de8ac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Sample output for NOP ---\n", + " // [System] 0x000 — NOP: No Operation\n", + " // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n", + " // Operation: Nothing\n", + " void NOP();\n", + "\n" + ] + } + ], "source": [ - "# Implement here some kind of \"C++\" printer" + "# Implement here some kind of \"C++\" printer\n", + "\n", + "# ── Indent used throughout the generated block ──────────────────────────────\n", + "INDENT = ' ' # 8 spaces — matches the indentation inside CPU.hpp\n", + "\n", + "def format_instruction(byte_code: str, mnemonic: str, name: str,\n", + " group: str, params: int,\n", + " addr_mask_1: str, addr_mask_2: str,\n", + " type_mask: str, operation: str) -> str:\n", + " \"\"\"\n", + " Returns a single C++ instruction declaration as a string.\n", + "\n", + " Each instruction becomes a commented constant inside the CPU class.\n", + " Format:\n", + " // [GROUP] 0xBYTE — MNEMONIC: Name\n", + " // Params: N | AddrMask1: XX AddrMask2: XX | TypeMask: XX\n", + " // Operation: ...\n", + " MNEMONIC\n", + " \"\"\"\n", + " lines = []\n", + "\n", + " # Header comment: group, opcode, mnemonic and human-readable name.\n", + " lines.append(f'{INDENT}// [{group}] 0x{byte_code} — {mnemonic}: {name}')\n", + "\n", + " # Second comment line: parameter count, addressing masks, type size mask.\n", + " lines.append(f'{INDENT}// Params: {params} | '\n", + " f'AddrMask1: {addr_mask_1} AddrMask2: {addr_mask_2} | '\n", + " f'TypeMask: {type_mask}')\n", + "\n", + " # Third comment line: what this instruction actually does.\n", + " lines.append(f'{INDENT}// Operation: {operation}')\n", + "\n", + " # The declaration itself — just the mnemonic name, matching NOP/SPDR style.\n", + " lines.append(f'{INDENT}void {mnemonic}();') # method declaration inside CPU class # enum value: NAME = 0xOPCODE,\n", + "\n", + " # Empty line between instructions for readability.\n", + " lines.append('')\n", + "\n", + " return '\\n'.join(lines)\n", + "\n", + "\n", + "def format_block(instructions: list) -> str:\n", + " \"\"\"\n", + " Joins all individual instruction strings into one complete block.\n", + " This is the text that will be injected between the pygen-target markers.\n", + " \"\"\"\n", + " # Join every formatted instruction into one big string.\n", + " return '\\n'.join(instructions)\n", + "\n", + "\n", + "# Print what one instruction looks like.\n", + "sample = format_instruction('000','NOP','No Operation','System',0,'00','00','00','Nothing')\n", + "print('--- Sample output for NOP ---')\n", + "print(sample)\n" ] }, { @@ -38,35 +138,309 @@ "execution_count": null, "id": "58645013", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Real instructions : 126\n", + "Reserved slots : 14\n", + "Duplicate check : PASSED\n", + "\n", + "Groups found:\n", + "group\n", + "Integer 19\n", + "System 15\n", + "Bit Wise 14\n", + "Boolean 12\n", + "Branch 12\n", + "Floating Point 10\n", + "Casts 10\n", + "Memory 9\n", + "Trigonometric 7\n", + "Exponential 6\n", + "Matrix 6\n", + "SIMD 5\n", + "Easter Eggs 1\n", + "\n", + "First 5 instructions:\n", + " byte_code mnemonic group params addr_mask_1 type_mask\n", + "0 000 NOP System 0 00 00\n", + "1 001 SPDR System 0 00 00\n", + "2 002 MMODE System 1 05 01\n", + "3 003 INT System 1 1F 0F\n", + "4 004 LRV System 1 1F 0C\n" + ] + } + ], "source": [ - "# read the instruction sheet with pandas" + "# read the instruction sheet with pandas\n", + "\n", + "\n", + "import pandas as pd\n", + "\n", + "# -- Load --------------------------------------------------------------------\n", + "# The data is on the 'Instructions' sheet. Header is on row index 6 (0-based),\n", + "# so we skip the first 6 rows of decorative merged cells.\n", + "raw = pd.read_excel(XLSX_PATH, sheet_name='Instructions', header=6)\n", + "\n", + "# Rename the two unnamed columns that hold the two addressing mode masks.\n", + "# In the sheet they appear after 'Acc. Addr. Mode Mask' with no header label.\n", + "raw.columns = [\n", + " 'skip_0', # empty column A\n", + " 'skip_1', # 'Base Instr.' label column\n", + " 'byte_code', # opcode hex string e.g. '000'\n", + " 'mnemonic', # short name e.g. 'NOP'\n", + " 'name', # full name e.g. 'No Operation'\n", + " 'group', # category e.g. 'System'\n", + " 'params', # number of parameters (0, 1, or 2)\n", + " 'imp', # addressing mode: Implied\n", + " 'imm', # addressing mode: Immediate\n", + " 'abs', # addressing mode: Absolute\n", + " 'reg', # addressing mode: Register\n", + " 'ind', # addressing mode: Indirect\n", + " 'ptr', # addressing mode: Pointer\n", + " 'idx', # addressing mode: Indexed\n", + " 'sca', # addressing mode: Scaled\n", + " 'dis', # addressing mode: Displaced\n", + " 'addr_mask_1', # accepted addressing mode mask for param 1\n", + " 'addr_mask_2', # accepted addressing mode mask for param 2\n", + " 'B', # type size: Byte (1 byte) supported?\n", + " 'S', # type size: Short (2 bytes) supported?\n", + " 'I', # type size: Int (4 bytes) supported?\n", + " 'L', # type size: Long (8 bytes) supported?\n", + " 'F', # type size: Float supported?\n", + " 'D', # type size: Double supported?\n", + " 'type_mask', # combined type size mask as hex string\n", + " 'operation', # human-readable description of what the instruction does\n", + " 'skip_2', # trailing empty column\n", + "]\n", + "\n", + "# ── Filter ───────────────────────────────────────────────────────────────────\n", + "# Keep only rows that have a byte_code value (drops empty rows at the bottom).\n", + "df = raw[raw['byte_code'].notna()].copy()\n", + "\n", + "# Separate reserved slots from real instructions.\n", + "# Reserved entries have '(reserved)' in the mnemonic column.\n", + "is_reserved = df['mnemonic'].astype(str).str.contains('reserved', case=False, na=False)\n", + "reserved_df = df[is_reserved].copy() # keep for reference\n", + "instrs_df = df[~is_reserved & df['mnemonic'].notna()].copy() # real instructions only\n", + "\n", + "# Skip incomplete entries — rows with no group are placeholder slots (e.g. Int 1-6 Slot)\n", + "# that have no defined behaviour yet. Keeping them would generate invalid C++ identifiers.\n", + "instrs_df = instrs_df[instrs_df['group'].notna()].copy()\n", + "\n", + "# ── Clean ────────────────────────────────────────────────────────────────────\n", + "# Fill NaN masks with '00' (means 'no modes accepted' — safe default).\n", + "instrs_df['addr_mask_1'] = instrs_df['addr_mask_1'].fillna('00').astype(str).str.strip()\n", + "instrs_df['addr_mask_2'] = instrs_df['addr_mask_2'].fillna('00').astype(str).str.strip()\n", + "instrs_df['type_mask'] = instrs_df['type_mask'].fillna('00').astype(str).str.strip()\n", + "instrs_df['params'] = instrs_df['params'].fillna(0).astype(int)\n", + "instrs_df['name'] = instrs_df['name'].fillna('').astype(str).str.strip()\n", + "instrs_df['group'] = instrs_df['group'].fillna('Unknown').astype(str).str.strip()\n", + "instrs_df['operation'] = instrs_df['operation'].fillna('').astype(str).str.strip()\n", + "\n", + "# ── Sanitize mnemonics ──────────────────────────────────────────────────────\n", + "# C++ identifiers cannot contain spaces. Replace spaces with underscores and\n", + "# convert to uppercase so 'Int 1 Slot' becomes 'INT_1_SLOT'.\n", + "instrs_df['mnemonic'] = (\n", + " instrs_df['mnemonic']\n", + " .astype(str)\n", + " .str.strip() # remove leading/trailing whitespace\n", + " .str.replace(' ', '_') # replace internal spaces with underscores\n", + " .str.upper() # uppercase for consistency\n", + ")\n", + "\n", + "# ── Validate: duplicate mnemonics ────────────────────────────────────────────\n", + "# Duplicates in real instruction names would cause C++ compilation errors.\n", + "# We abort here rather than generating broken code.\n", + "mnemonic_counts = instrs_df['mnemonic'].value_counts()\n", + "duplicates = mnemonic_counts[mnemonic_counts > 1]\n", + "if not duplicates.empty:\n", + " # Show which mnemonics are duplicated before raising the error.\n", + " raise ValueError(f'Duplicate mnemonics found — fix the sheet before generating:\\n{duplicates}')\n", + "\n", + "print(f'Real instructions : {len(instrs_df)}')\n", + "print(f'Reserved slots : {len(reserved_df)}')\n", + "print(f'Duplicate check : PASSED')\n", + "print(f'\\nGroups found:')\n", + "print(instrs_df['group'].value_counts().to_string())\n", + "print(f'\\nFirst 5 instructions:')\n", + "print(instrs_df[['byte_code','mnemonic','group','params','addr_mask_1','type_mask']].head().to_string())\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "452bc76c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Masks written to: /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/pygen_out/InstructionMasks.hpp\n", + "Lines generated : 268\n" + ] + } + ], "source": [ - "# well, then export the masks (TODO)" + "# well, then export the masks (TODO)\n", + "\n", + "\n", + "# ── Build the masks header content ──────────────────────────────────────────\n", + "lines = []\n", + "\n", + "# Standard C++ header guard — prevents the file from being included more than once.\n", + "lines.append('#pragma once')\n", + "lines.append('// AUTO-GENERATED by pygen.ipynb — DO NOT EDIT MANUALLY')\n", + "lines.append('#include ')\n", + "lines.append('')\n", + "lines.append('namespace spider {')\n", + "lines.append('')\n", + "\n", + "# ── Addressing mode mask table ───────────────────────────────────────────────\n", + "# Each instruction has two masks (one per parameter).\n", + "# We write them as a constexpr array so the VM can look them up at runtime\n", + "# using the opcode as the index.\n", + "lines.append('// Addressing mode masks — indexed by opcode.')\n", + "lines.append('// [opcode][0] = mask for param 1, [opcode][1] = mask for param 2')\n", + "lines.append('constexpr u8 ADDR_MODE_MASKS[][2] = {')\n", + "\n", + "for _, row in instrs_df.iterrows():\n", + " # Convert the hex string mask to an integer for the C++ literal.\n", + " m1 = row['addr_mask_1'].replace('.0','').strip() # remove pandas float artefact\n", + " m2 = row['addr_mask_2'].replace('.0','').strip()\n", + " m1 = m1 if m1 != 'nan' else '00'\n", + " m2 = m2 if m2 != 'nan' else '00'\n", + " # Each row: { 0xMASK1, 0xMASK2 }, // MNEMONIC\n", + " lines.append(f' {{ 0x{m1.upper()}, 0x{m2.upper()} }}, // {row[\"mnemonic\"]}')\n", + "\n", + "lines.append('};')\n", + "lines.append('')\n", + "\n", + "# ── Type size mask table ─────────────────────────────────────────────────────\n", + "# A single byte per instruction encoding which type sizes it accepts.\n", + "lines.append('// Type size masks — indexed by opcode.')\n", + "lines.append('constexpr u8 TYPE_SIZE_MASKS[] = {')\n", + "\n", + "for _, row in instrs_df.iterrows():\n", + " tm = str(row['type_mask']).replace('.0','').strip()\n", + " tm = tm if tm != 'nan' else '00'\n", + " lines.append(f' 0x{tm.upper()}, // {row[\"mnemonic\"]}')\n", + "\n", + "lines.append('};')\n", + "lines.append('')\n", + "lines.append('} // namespace spider')\n", + "\n", + "# ── Write to file ────────────────────────────────────────────────────────────\n", + "masks_path = os.path.join(OUT_DIR, 'InstructionMasks.hpp')\n", + "with open(masks_path, 'w', encoding='utf-8') as f:\n", + " # Join with Unix line endings only — repo etiquette says no \\r\\n.\n", + " f.write('\\n'.join(lines))\n", + "\n", + "print(f'Masks written to: {masks_path}')\n", + "print(f'Lines generated : {len(lines)}')\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "5aaebef0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Instructions formatted: 126\n", + "\n", + "--- Preview (first 2 instructions) ---\n", + " // [System] 0x000 — NOP: No Operation\n", + " // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n", + " // Operation: Nothing\n", + " void NOP();\n", + "\n", + " // [System] 0x001 — SPDR: Will place the Spider version of the interpreter in RA\n", + " // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n", + " // Operation: (Spider Version) -> RA\n", + " void SPDR();\n", + "\n", + "\n", + "CPU.hpp updated successfully at: /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/src/spider/runtime/cpu/CPU.hpp\n", + "Total lines in updated file: 674\n" + ] + } + ], "source": [ - "# print the CPU Instructions" + "# print the CPU Instructions\n", + "\n", + "# ── Generate all instruction declarations ───────────────────────────────────\n", + "formatted = []\n", + "\n", + "for _, row in instrs_df.iterrows():\n", + " # Clean each field — remove pandas float artefacts like '00.0'\n", + " byte_code = str(row['byte_code']).strip()\n", + " mnemonic = str(row['mnemonic']).strip()\n", + " name = str(row['name']).strip()\n", + " group = str(row['group']).strip()\n", + " params = int(row['params'])\n", + " addr_mask_1 = str(row['addr_mask_1']).replace('.0', '').strip()\n", + " addr_mask_2 = str(row['addr_mask_2']).replace('.0', '').strip()\n", + " type_mask = str(row['type_mask']).replace('.0', '').strip()\n", + " operation = str(row['operation']).strip()\n", + "\n", + " # Call the C++ printer from Cell 2 to format this instruction.\n", + " formatted.append(format_instruction(\n", + " byte_code, mnemonic, name, group,\n", + " params, addr_mask_1, addr_mask_2,\n", + " type_mask, operation\n", + " ))\n", + "\n", + "# Combine all declarations into one block string.\n", + "generated_block = format_block(formatted)\n", + "\n", + "print(f'Instructions formatted: {len(formatted)}')\n", + "print('\\n--- Preview (first 2 instructions) ---')\n", + "print('\\n'.join(formatted[:2]))\n", + "\n", + "# ── Inject into CPU.hpp ──────────────────────────────────────────────────────\n", + "# The markers tell us exactly where to insert the generated block.\n", + "MARKER_OPEN = '// //'\n", + "MARKER_CLOSE = '// //'\n", + "\n", + "# Read the current CPU.hpp content.\n", + "with open(CPU_HPP_PATH, 'r', encoding='utf-8') as f:\n", + " original = f.read()\n", + "\n", + "# Verify both markers exist before modifying anything.\n", + "# If either is missing, the file was edited by hand — abort to avoid corruption.\n", + "if MARKER_OPEN not in original:\n", + " raise ValueError(f'Open marker not found in CPU.hpp: {MARKER_OPEN}')\n", + "if MARKER_CLOSE not in original:\n", + " raise ValueError(f'Close marker not found in CPU.hpp: {MARKER_CLOSE}')\n", + "\n", + "# Split the file into 3 parts around the pygen-target markers.\n", + "# before : everything up to and including the open marker\n", + "# after : from the close marker onward (including it)\n", + "before = original[:original.index(MARKER_OPEN) + len(MARKER_OPEN)]\n", + "after = original[original.index(MARKER_CLOSE):]\n", + "\n", + "# Reassemble: keep before, inject the generated block, then restore after.\n", + "updated = before + '\\n' + generated_block + '\\n' + INDENT + after\n", + "\n", + "# Write back using UTF-8 and Unix line endings only (repo etiquette: no \\r\\n).\n", + "with open(CPU_HPP_PATH, 'w', encoding='utf-8', newline='\\n') as f:\n", + " f.write(updated)\n", + "\n", + "print(f'\\nCPU.hpp updated successfully at: {CPU_HPP_PATH}')\n", + "print(f'Total lines in updated file: {len(updated.splitlines())}')\n" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "spider-rntm-env", "language": "python", "name": "python3" }, @@ -80,7 +454,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.7" + "version": "3.12.3" } }, "nbformat": 4,