spider-runtime/pygen.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "21877801",
   "metadata": {},
   "source": [
    "## Python Generator\n",
    "\n",
    "This python notebook will serve to generate the necessary code to\n",
    "generate some things from Spider.\n",
    "\n",
    "Specifically, it will generate the CPU instructions (currently)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b0fcd533",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Repo root : ./ -> (True)\n",
      "CPU.hpp   : .//src//spider/runtime/cpu/CPU.hpp -> (True)\n",
      "XLSX      : .//docs//Spider Instructions.xlsx -> (True)\n",
      "Output dir: .//autogen/ -> (True)\n"
     ]
    }
   ],
   "source": [
    "# setup directories\n",
    "import os\n",
    "\n",
    "# [CHANGE]\n",
    "# Since we're running on a local environment (i hope)\n",
    "# we can just signal a relative directory.\n",
    "REPO_ROOT = './'\n",
    "DOCS_ROOT = f'{REPO_ROOT}/docs/'\n",
    "SRC_ROOT = f'{REPO_ROOT}/src/'\n",
    "\n",
    "# Where CPU.hpp lives — this is the file we will inject generated code into.\n",
    "CPU_HPP_PATH = f'{SRC_ROOT}/spider/runtime/cpu/CPU.hpp'\n",
    "\n",
    "# Where the Excel instruction sheet lives. Allocate the .xlsx file in the project's root folder.\n",
    "# NOTE: The file I uploaded has a space instead of underscore!\n",
    "XLSX_PATH = f'{DOCS_ROOT}/Spider Instructions.xlsx'\n",
    "\n",
    "# Output folder for any standalone generated files.\n",
    "OUT_DIR = f'{REPO_ROOT}/autogen/'\n",
    "\n",
    "# Create the output directory if it does not exist yet.\n",
    "# exist_ok=True means no error if it already exists.\n",
    "os.makedirs(OUT_DIR, exist_ok=True)\n",
    "\n",
    "def dir_exists(path:str):\n",
    "    return os.path.exists(path) and os.path.isdir(path)\n",
    "def file_exists(path:str):\n",
    "    return os.path.exists(path) and os.path.isfile(path)\n",
    "\n",
    "print(f'Repo root : {REPO_ROOT   } -> ({ dir_exists(REPO_ROOT   )})')\n",
    "print(f'CPU.hpp   : {CPU_HPP_PATH} -> ({file_exists(CPU_HPP_PATH)})')\n",
    "print(f'XLSX      : {XLSX_PATH   } -> ({file_exists(XLSX_PATH   )})')\n",
    "print(f'Output dir: {OUT_DIR     } -> ({ dir_exists(OUT_DIR     )})')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b33de8ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- Sample output for NOP ---\n",
      "        // [System] 0x000 — NOP: No Operation\n",
      "        //   Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
      "        //   Operation: Nothing\n",
      "        void NOP();\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Implement here some kind of \"C++\" printer\n",
    "\n",
    "# ── Indent used throughout the generated block ──────────────────────────────\n",
    "INDENT = '        '  # 8 spaces — matches the indentation inside CPU.hpp\n",
    "\n",
    "def format_instruction(byte_code: str, mnemonic: str, name: str,\n",
    "                        group: str, params: int,\n",
    "                        addr_mask_1: str, addr_mask_2: str,\n",
    "                        type_mask: str, operation: str) -> str:\n",
    "    \"\"\"\n",
    "    Returns a single C++ instruction declaration as a string.\n",
    "\n",
    "    Each instruction becomes a commented constant inside the CPU class.\n",
    "    Format:\n",
    "        // [GROUP] 0xBYTE — MNEMONIC: Name\n",
    "        //   Params: N | AddrMask1: XX AddrMask2: XX | TypeMask: XX\n",
    "        //   Operation: ...\n",
    "        MNEMONIC\n",
    "    \"\"\"\n",
    "    lines = []\n",
    "\n",
    "    # Header comment: group, opcode, mnemonic and human-readable name.\n",
    "    lines.append(f'{INDENT}// [{group}] 0x{byte_code} — {mnemonic}: {name}')\n",
    "\n",
    "    # Second comment line: parameter count, addressing masks, type size mask.\n",
    "    lines.append(f'{INDENT}//   Params: {params} | '\n",
    "                 f'AddrMask1: {addr_mask_1} AddrMask2: {addr_mask_2} | '\n",
    "                 f'TypeMask: {type_mask}')\n",
    "\n",
    "    # Third comment line: what this instruction actually does.\n",
    "    lines.append(f'{INDENT}//   Operation: {operation}')\n",
    "\n",
    "    # The declaration itself — just the mnemonic name, matching NOP/SPDR style.\n",
    "    lines.append(f'{INDENT}void {mnemonic}();')          # method declaration inside CPU class  # enum value: NAME = 0xOPCODE,\n",
    "\n",
    "    # Empty line between instructions for readability.\n",
    "    lines.append('')\n",
    "\n",
    "    return '\\n'.join(lines)\n",
    "\n",
    "\n",
    "def format_block(instructions: list) -> str:\n",
    "    \"\"\"\n",
    "    Joins all individual instruction strings into one complete block.\n",
    "    This is the text that will be injected between the pygen-target markers.\n",
    "    \"\"\"\n",
    "    # Join every formatted instruction into one big string.\n",
    "    return '\\n'.join(instructions)\n",
    "\n",
    "\n",
    "# Print what one instruction looks like.\n",
    "sample = format_instruction('000','NOP','No Operation','System',0,'00','00','00','Nothing')\n",
    "print('--- Sample output for NOP ---')\n",
    "print(sample)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "58645013",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Real instructions : 128\n",
      "Reserved slots    : 14\n",
      "Duplicate check   : PASSED\n",
      "\n",
      "Groups found:\n",
      "group\n",
      "Integer           19\n",
      "System            15\n",
      "Bit Wise          14\n",
      "Boolean           12\n",
      "Branch            12\n",
      "Floating Point    10\n",
      "Casts             10\n",
      "Memory             9\n",
      "Trigonometric      7\n",
      "Exponential        6\n",
      "Matrix             6\n",
      "SIMD               5\n",
      "Quaternion         2\n",
      "Easter Eggs        1\n",
      "\n",
      "First 5 instructions:\n",
      "  byte_code mnemonic   group  params addr_mask_1 type_mask\n",
      "0       000      NOP  System       0          00        00\n",
      "1       001     SPDR  System       0          00        00\n",
      "2       002    MMODE  System       1          05        01\n",
      "3       003      INT  System       1          1F        08\n",
      "4       004      LRV  System       1          1F        08\n"
     ]
    }
   ],
   "source": [
    "# read the instruction sheet with pandas\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "# -- Load --------------------------------------------------------------------\n",
    "# The data is on the 'Instructions' sheet. Header is on row index 6 (0-based),\n",
    "# so we skip the first 6 rows of decorative merged cells.\n",
    "raw = pd.read_excel(XLSX_PATH, sheet_name='Instructions', header=6)\n",
    "\n",
    "# Rename the two unnamed columns that hold the two addressing mode masks.\n",
    "# In the sheet they appear after 'Acc. Addr. Mode Mask' with no header label.\n",
    "raw.columns = [\n",
    "    'skip_0',       # empty column A\n",
    "    'skip_1',       # 'Base Instr.' label column\n",
    "    'byte_code',    # opcode hex string e.g. '000'\n",
    "    'mnemonic',     # short name e.g. 'NOP'\n",
    "    'name',         # full name e.g. 'No Operation'\n",
    "    'group',        # category e.g. 'System'\n",
    "    'params',       # number of parameters (0, 1, or 2)\n",
    "    'imp',          # addressing mode: Implied\n",
    "    'imm',          # addressing mode: Immediate\n",
    "    'abs',          # addressing mode: Absolute\n",
    "    'reg',          # addressing mode: Register\n",
    "    'ind',          # addressing mode: Indirect\n",
    "    'ptr',          # addressing mode: Pointer\n",
    "    'idx',          # addressing mode: Indexed\n",
    "    'sca',          # addressing mode: Scaled\n",
    "    'dis',          # addressing mode: Displaced\n",
    "    'addr_mask_1',  # accepted addressing mode mask for param 1\n",
    "    'addr_mask_2',  # accepted addressing mode mask for param 2\n",
    "    'ignores_addrm',# whether the instruction ignores addressing modes\n",
    "    'B',            # type size: Byte (1 byte) supported?\n",
    "    'S',            # type size: Short (2 bytes) supported?\n",
    "    'I',            # type size: Int (4 bytes) supported?\n",
    "    'L',            # type size: Long (8 bytes) supported?\n",
    "    'F',            # type size: Float supported?\n",
    "    'D',            # type size: Double supported?\n",
    "    'type_mask',    # combined type size mask as hex string\n",
    "    'expensive',    # marks computationally expensive instructions\n",
    "    'operation',    # human-readable description of what the instruction does\n",
    "    'skip_2',       # trailing empty column\n",
    "]\n",
    "\n",
    "# ── Filter ───────────────────────────────────────────────────────────────────\n",
    "# Keep only rows that have a byte_code value (drops empty rows at the bottom).\n",
    "df = raw[raw['byte_code'].notna()].copy()\n",
    "\n",
    "# Separate reserved slots from real instructions.\n",
    "# Reserved entries have '(reserved)' in the mnemonic column.\n",
    "is_reserved = df['mnemonic'].astype(str).str.contains('reserved', case=False, na=False)\n",
    "reserved_df  = df[is_reserved].copy()   # keep for reference\n",
    "instrs_df    = df[~is_reserved & df['mnemonic'].notna()].copy()  # real instructions only\n",
    "\n",
    "# Skip incomplete entries — rows with no group are placeholder slots (e.g. Int 1-6 Slot)\n",
    "# that have no defined behaviour yet. Keeping them would generate invalid C++ identifiers.\n",
    "instrs_df = instrs_df[instrs_df['group'].notna()].copy()\n",
    "\n",
    "# ── Clean ────────────────────────────────────────────────────────────────────\n",
    "# Fill NaN masks with '00' (means 'no modes accepted' — safe default).\n",
    "instrs_df['addr_mask_1'] = instrs_df['addr_mask_1'].fillna('00').astype(str).str.strip()\n",
    "instrs_df['addr_mask_2'] = instrs_df['addr_mask_2'].fillna('00').astype(str).str.strip()\n",
    "instrs_df['type_mask']   = instrs_df['type_mask'].fillna('00').astype(str).str.strip()\n",
    "instrs_df['params']      = instrs_df['params'].fillna(0).astype(int)\n",
    "instrs_df['name']        = instrs_df['name'].fillna('').astype(str).str.strip()\n",
    "instrs_df['group']       = instrs_df['group'].fillna('Unknown').astype(str).str.strip()\n",
    "instrs_df['operation']   = instrs_df['operation'].fillna('').astype(str).str.strip()\n",
    "\n",
    "# ── Sanitize mnemonics ──────────────────────────────────────────────────────\n",
    "# C++ identifiers cannot contain spaces. Replace spaces with underscores and\n",
    "# convert to uppercase so 'Int 1 Slot' becomes 'INT_1_SLOT'.\n",
    "instrs_df['mnemonic'] = (\n",
    "    instrs_df['mnemonic']\n",
    "    .astype(str)\n",
    "    .str.strip()                  # remove leading/trailing whitespace\n",
    "    .str.replace(' ', '_')        # replace internal spaces with underscores\n",
    "    .str.upper()                  # uppercase for consistency\n",
    ")\n",
    "\n",
    "# ── Validate: duplicate mnemonics ────────────────────────────────────────────\n",
    "# Duplicates in real instruction names would cause C++ compilation errors.\n",
    "# We abort here rather than generating broken code.\n",
    "mnemonic_counts = instrs_df['mnemonic'].value_counts()\n",
    "duplicates = mnemonic_counts[mnemonic_counts > 1]\n",
    "if not duplicates.empty:\n",
    "    # Show which mnemonics are duplicated before raising the error.\n",
    "    raise ValueError(f'Duplicate mnemonics found — fix the sheet before generating:\\n{duplicates}')\n",
    "\n",
    "print(f'Real instructions : {len(instrs_df)}')\n",
    "print(f'Reserved slots    : {len(reserved_df)}')\n",
    "print(f'Duplicate check   : PASSED')\n",
    "print(f'\\nGroups found:')\n",
    "print(instrs_df['group'].value_counts().to_string())\n",
    "print(f'\\nFirst 5 instructions:')\n",
    "print(instrs_df[['byte_code','mnemonic','group','params','addr_mask_1','type_mask']].head().to_string())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "452bc76c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Masks written to: .//autogen/InstructionMasks.hpp\n",
      "Lines generated : 272\n"
     ]
    }
   ],
   "source": [
    "# well, then export the masks (TODO)\n",
    "\n",
    "\n",
    "# ── Build the masks header content ──────────────────────────────────────────\n",
    "lines = []\n",
    "\n",
    "# Standard C++ header guard — prevents the file from being included more than once.\n",
    "lines.append('#pragma once')\n",
    "lines.append('// AUTO-GENERATED by pygen.ipynb — DO NOT EDIT MANUALLY')\n",
    "lines.append('#include <spider/runtime/common.hpp>')\n",
    "lines.append('')\n",
    "lines.append('namespace spider {')\n",
    "lines.append('')\n",
    "\n",
    "# ── Addressing mode mask table ───────────────────────────────────────────────\n",
    "# Each instruction has two masks (one per parameter).\n",
    "# We write them as a constexpr array so the VM can look them up at runtime\n",
    "# using the opcode as the index.\n",
    "lines.append('// Addressing mode masks — indexed by opcode.')\n",
    "lines.append('// [opcode][0] = mask for param 1, [opcode][1] = mask for param 2')\n",
    "lines.append('constexpr u8 ADDR_MODE_MASKS[][2] = {')\n",
    "\n",
    "for _, row in instrs_df.iterrows():\n",
    "    # Convert the hex string mask to an integer for the C++ literal.\n",
    "    m1 = row['addr_mask_1'].replace('.0','').strip()  # remove pandas float artefact\n",
    "    m2 = row['addr_mask_2'].replace('.0','').strip()\n",
    "    m1 = m1 if m1 != 'nan' else '00'\n",
    "    m2 = m2 if m2 != 'nan' else '00'\n",
    "    # Each row: { 0xMASK1, 0xMASK2 }, // MNEMONIC\n",
    "    lines.append(f'    {{ 0x{m1.upper()}, 0x{m2.upper()} }},  // {row[\"mnemonic\"]}')\n",
    "\n",
    "lines.append('};')\n",
    "lines.append('')\n",
    "\n",
    "# ── Type size mask table ─────────────────────────────────────────────────────\n",
    "# A single byte per instruction encoding which type sizes it accepts.\n",
    "lines.append('// Type size masks — indexed by opcode.')\n",
    "lines.append('constexpr u8 TYPE_SIZE_MASKS[] = {')\n",
    "\n",
    "for _, row in instrs_df.iterrows():\n",
    "    tm = str(row['type_mask']).replace('.0','').strip()\n",
    "    tm = tm if tm != 'nan' else '00'\n",
    "    lines.append(f'    0x{tm.upper()},  // {row[\"mnemonic\"]}')\n",
    "\n",
    "lines.append('};')\n",
    "lines.append('')\n",
    "lines.append('} // namespace spider')\n",
    "\n",
    "# ── Write to file ────────────────────────────────────────────────────────────\n",
    "masks_path = os.path.join(OUT_DIR, 'InstructionMasks.hpp')\n",
    "with open(masks_path, 'w', encoding='utf-8') as f:\n",
    "    # Join with Unix line endings only — repo etiquette says no \\r\\n.\n",
    "    f.write('\\n'.join(lines))\n",
    "\n",
    "print(f'Masks written to: {masks_path}')\n",
    "print(f'Lines generated : {len(lines)}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5aaebef0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Instructions formatted: 128\n",
      "\n",
      "--- Preview (first 2 instructions) ---\n",
      "        // [System] 0x000 — NOP: No Operation\n",
      "        //   Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
      "        //   Operation: Nothing\n",
      "        void NOP();\n",
      "\n",
      "        // [System] 0x001 — SPDR: Will place the Spider version of the interpreter in RA\n",
      "        //   Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
      "        //   Operation: (Spider Version) -> RA\n",
      "        void SPDR();\n",
      "\n",
      "\n",
      "CPU.hpp updated successfully at: .//src//spider/runtime/cpu/CPU.hpp\n",
      "Total lines in updated file: 883\n"
     ]
    }
   ],
   "source": [
    "# print the CPU Instructions\n",
    "\n",
    "# ── Generate all instruction declarations ───────────────────────────────────\n",
    "formatted = []\n",
    "\n",
    "for _, row in instrs_df.iterrows():\n",
    "    # Clean each field — remove pandas float artefacts like '00.0'\n",
    "    byte_code   = str(row['byte_code']).strip()\n",
    "    mnemonic    = str(row['mnemonic']).strip()\n",
    "    name        = str(row['name']).strip()\n",
    "    group       = str(row['group']).strip()\n",
    "    params      = int(row['params'])\n",
    "    addr_mask_1 = str(row['addr_mask_1']).replace('.0', '').strip()\n",
    "    addr_mask_2 = str(row['addr_mask_2']).replace('.0', '').strip()\n",
    "    type_mask   = str(row['type_mask']).replace('.0', '').strip()\n",
    "    operation   = str(row['operation']).strip()\n",
    "\n",
    "    # Call the C++ printer from Cell 2 to format this instruction.\n",
    "    formatted.append(format_instruction(\n",
    "        byte_code, mnemonic, name, group,\n",
    "        params, addr_mask_1, addr_mask_2,\n",
    "        type_mask, operation\n",
    "    ))\n",
    "\n",
    "# Combine all declarations into one block string.\n",
    "generated_block = format_block(formatted)\n",
    "\n",
    "print(f'Instructions formatted: {len(formatted)}')\n",
    "print('\\n--- Preview (first 2 instructions) ---')\n",
    "print('\\n'.join(formatted[:2]))\n",
    "\n",
    "# ── Inject into CPU.hpp ──────────────────────────────────────────────────────\n",
    "# The markers tell us exactly where to insert the generated block.\n",
    "MARKER_OPEN  = '// <pygen-target name=cpu-instructions> //'\n",
    "MARKER_CLOSE = '// </pygen-target> //'\n",
    "\n",
    "# Read the current CPU.hpp content.\n",
    "with open(CPU_HPP_PATH, 'r', encoding='utf-8') as f:\n",
    "    original = f.read()\n",
    "\n",
    "# Verify both markers exist before modifying anything.\n",
    "# If either is missing, the file was edited by hand — abort to avoid corruption.\n",
    "if MARKER_OPEN not in original:\n",
    "    raise ValueError(f'Open marker not found in CPU.hpp: {MARKER_OPEN}')\n",
    "if MARKER_CLOSE not in original:\n",
    "    raise ValueError(f'Close marker not found in CPU.hpp: {MARKER_CLOSE}')\n",
    "\n",
    "# Split the file into 3 parts around the pygen-target markers.\n",
    "# before : everything up to and including the open marker\n",
    "# after  : from the close marker onward (including it)\n",
    "before = original[:original.index(MARKER_OPEN) + len(MARKER_OPEN)]\n",
    "after  = original[original.index(MARKER_CLOSE):]\n",
    "\n",
    "# Reassemble: keep before, inject the generated block, then restore after.\n",
    "updated = before + '\\n' + generated_block + '\\n' + INDENT + after\n",
    "\n",
    "# Write back using UTF-8 and Unix line endings only (repo etiquette: no \\r\\n).\n",
    "with open(CPU_HPP_PATH, 'w', encoding='utf-8', newline='\\n') as f:\n",
    "    f.write(updated)\n",
    "\n",
    "print(f'\\nCPU.hpp updated successfully at: {CPU_HPP_PATH}')\n",
    "print(f'Total lines in updated file: {len(updated.splitlines())}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "instrmap_gen",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "InstrMap.cpp written to: .//src//spider/runtime/instr/InstrMap.cpp\n",
      "  Size          : 34,157 bytes\n",
      "  Array entries : 512 (128 populated, 384 nullptr)\n",
      "  Switch cases  : 128\n",
      "  Line endings  : LF-only verified\n"
     ]
    }
   ],
   "source": [
    "# ── Generate InstrMap.cpp ────────────────────────────────────────────────────\n",
    "# Produces two dispatch implementations in one file:\n",
    "#   1. CPUInstr InstrMap[512] — array of member-function pointers\n",
    "#   2. void CPU::execute(u16) — switch/case version\n",
    "#\n",
    "# Both use UPPERCASE method names matching the mnemonic column.\n",
    "\n",
    "TABLE_SIZE = 512  # 9-bit opcode space\n",
    "\n",
    "# Build opcode -> mnemonic lookup from the cleaned instruction DataFrame.\n",
    "opcode_to_mnem: dict[int, str] = {}\n",
    "opcode_to_name: dict[int, str] = {}\n",
    "opcode_to_group: dict[int, str] = {}\n",
    "\n",
    "for _, row in instrs_df.iterrows():\n",
    "    bc  = str(row['byte_code']).strip()\n",
    "    opc = int(bc, 16)\n",
    "    opcode_to_mnem[opc]  = str(row['mnemonic']).strip()\n",
    "    opcode_to_name[opc]  = str(row['name']).strip()\n",
    "    opcode_to_group[opc] = str(row['group']).strip()\n",
    "\n",
    "# Also track reserved slots for annotation.\n",
    "reserved_opcodes: set[int] = set()\n",
    "for _, row in reserved_df.iterrows():\n",
    "    bc = str(row['byte_code']).strip()\n",
    "    if bc and bc != 'nan':\n",
    "        reserved_opcodes.add(int(bc, 16))\n",
    "\n",
    "# ── Assemble the file ───────────────────────────────────────────────────────\n",
    "L = []\n",
    "L.append('/**')\n",
    "L.append(' * @file InstrMap.cpp')\n",
    "L.append(' * @brief Spider VM instruction dispatch — array and switch implementations.')\n",
    "L.append(' *')\n",
    "L.append(' * AUTO-GENERATED by pygen.ipynb — DO NOT EDIT BY HAND.')\n",
    "L.append(' *')\n",
    "L.append(' * This file provides two equivalent dispatch mechanisms:')\n",
    "L.append(' *')\n",
    "L.append(' *   1. InstrMap[]  — A lookup table of member-function pointers indexed by')\n",
    "L.append(' *                    opcode.  O(1) dispatch; suitable for platforms where')\n",
    "L.append(' *                    indirect calls through function pointers are efficient.')\n",
    "L.append(' *')\n",
    "L.append(' *   2. CPU::execute(u16) — A switch/case over every opcode.  Lets the')\n",
    "L.append(' *                    compiler emit a jump table or branch tree; may be')\n",
    "L.append(' *                    preferable on microcontrollers or when link-time')\n",
    "L.append(' *                    optimisation can inline the handlers.')\n",
    "L.append(' *')\n",
    "L.append(' */')\n",
    "L.append('') # [CHANGE] Use absolute path to make paths more explicit\n",
    "L.append('#include <spider/runtime/cpu/CPU.hpp>')\n",
    "L.append('')\n",
    "L.append('namespace spider {')\n",
    "L.append('')\n",
    "\n",
    "# ── Version 1: Array: ────────────────────────────────────────────────────────\n",
    "L.append('// =============================================================')\n",
    "L.append('//  Version 1 — Lookup table of member-function pointers')\n",
    "L.append('// =============================================================')\n",
    "L.append('')\n",
    "# [CHANGE] Use CPU::Fn Instead\n",
    "#L.append('/** Pointer-to-member type for a zero-argument CPU instruction. */')\n",
    "#L.append('using CPUInstr = void (CPU::*)();')\n",
    "L.append('')\n",
    "L.append('/**')\n",
    "L.append(f' * Instruction dispatch table ({TABLE_SIZE} entries, 9-bit opcode space).')\n",
    "L.append(' *')\n",
    "L.append(' * Usage:')\n",
    "L.append(' *     u16 opcode = fetch();')\n",
    "L.append(' *     CPU::Fn fn = InstrMap[opcode];')\n",
    "L.append(' *     if (fn) (cpu.*fn)();')\n",
    "L.append(' */') # [CHANGE] Made it part of the CPU & avoided explicit size.\n",
    "L.append(f'CPU::Fn CPU::instrMap[] = {{')\n",
    "\n",
    "for opc in range(TABLE_SIZE):\n",
    "    mnem = opcode_to_mnem.get(opc)\n",
    "    if mnem:\n",
    "        name = opcode_to_name[opc]\n",
    "        L.append(f'    &CPU::{mnem + \",\":<28s}// 0x{opc:03X} — {name}')\n",
    "    else:\n",
    "        tag = ''\n",
    "        if opc in reserved_opcodes:\n",
    "            tag = '  (reserved)'\n",
    "        L.append(f'    {\"nullptr,\":<28s}// 0x{opc:03X}{tag}')\n",
    "\n",
    "L.append('};')\n",
    "L.append('')\n",
    "L.append('')\n",
    "\n",
    "# ── Version 2: Switch ──────────────────────────────────────────────────────\n",
    "L.append('// =============================================================')\n",
    "L.append('//  Version 2 — Switch dispatch')\n",
    "L.append('// =============================================================')\n",
    "L.append('')\n",
    "L.append('/**')\n",
    "L.append(' * Execute the instruction identified by @p opcode.')\n",
    "L.append(' *')\n",
    "L.append(' * This is functionally equivalent to the InstrMap[] table above')\n",
    "L.append(' * but expressed as a switch so the compiler can choose the best')\n",
    "L.append(' * lowering strategy (jump table, binary search, etc.).')\n",
    "L.append(' *')\n",
    "L.append(' * @param opcode  9-bit instruction opcode (0x000 - 0x1FF).')\n",
    "L.append(' */')\n",
    "L.append('void CPU::executeSwLk() {')\n",
    "L.append('    switch (_opcode) {')\n",
    "\n",
    "last_group = None\n",
    "for opc in sorted(opcode_to_mnem.keys()):\n",
    "    mnem  = opcode_to_mnem[opc]\n",
    "    group = opcode_to_group[opc]\n",
    "    if group != last_group:\n",
    "        L.append('')\n",
    "        L.append(f'    // ── {group} ' + '─' * max(1, 44 - len(group)))\n",
    "        last_group = group\n",
    "    L.append(f'    case 0x{opc:03X}:  {mnem}();  break;')\n",
    "\n",
    "L.append('')\n",
    "L.append('    default:')\n",
    "L.append('        break;')\n",
    "L.append('    }')\n",
    "L.append('}')\n",
    "L.append('')\n",
    "L.append('} // namespace spider')\n",
    "L.append('')\n",
    "\n",
    "INSTRMAP_SRC = '\\n'.join(L)\n",
    "\n",
    "# ── Write to file ───────────────────────────────────────────────────────────\n",
    "# [CHANGE] Write this in the instructions folder to avoid CPU file bloat\n",
    "INSTRMAP_PATH = f'{SRC_ROOT}/spider/runtime/instr/InstrMap.cpp'\n",
    "\n",
    "with open(INSTRMAP_PATH, 'wb') as f:\n",
    "    f.write(INSTRMAP_SRC.encode('utf-8'))\n",
    "\n",
    "# Verify LF-only\n",
    "with open(INSTRMAP_PATH, 'rb') as f:\n",
    "    raw_bytes = f.read()\n",
    "assert b'\\r' not in raw_bytes, 'CRLF detected in InstrMap.cpp!'\n",
    "\n",
    "array_count  = INSTRMAP_SRC.count('&CPU::')\n",
    "switch_count = INSTRMAP_SRC.count('case 0x')\n",
    "\n",
    "print(f'InstrMap.cpp written to: {INSTRMAP_PATH}')\n",
    "print(f'  Size          : {len(raw_bytes):,} bytes')\n",
    "print(f'  Array entries : {TABLE_SIZE} ({array_count} populated, {TABLE_SIZE - array_count} nullptr)')\n",
    "print(f'  Switch cases  : {switch_count}')\n",
    "print(f'  Line endings  : LF-only verified')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9f190f4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ---------- GENERATE INSTR_XX FILES ----------\n",
    "INSTR_DIR = f'{SRC_ROOT}/spider/runtime/instr'\n",
    "\n",
    "# Generate the files\n",
    "# Each file goes from 00 to 1F (0-31), 32 instr / file\n",
    "for x0 in range(0, TABLE_SIZE, 32):\n",
    "    x1 = x0 + 31\n",
    "    fname = f'{INSTR_DIR}/Instr_{x0:03X}-{x1:03X}.cpp'\n",
    "\n",
    "    # TODO: Check if there are missing instructions\n",
    "    if file_exists(fname):\n",
    "        continue\n",
    "\n",
    "    # CREATE FILE FROM SCRATCH\n",
    "    L = []\n",
    "    L.append('/**')\n",
    "    L.append(' * @brief AUTO-GENERATED by pygen.ipynb BUT editable by hand!')\n",
    "    L.append(' *')\n",
    "    L.append(' */')\n",
    "    L.append('') # [CHANGE] Use absolute path to make paths more explicit\n",
    "    L.append('#include <spider/runtime/cpu/CPU.hpp>')\n",
    "    L.append('')\n",
    "    L.append('namespace spider {')\n",
    "    L.append('')\n",
    "\n",
    "    for opc in range(x0, x1 + 1):\n",
    "        mnem = opcode_to_mnem.get(opc)\n",
    "        if mnem is None: continue\n",
    "\n",
    "        L.append(f'    void CPU::{mnem}() {{')\n",
    "        L.append(f'        // TODO: Implement {mnem}')\n",
    "        L.append(f'    }}')\n",
    "        L.append('')\n",
    "\n",
    "    L.append('}')\n",
    "    L.append('')\n",
    "\n",
    "    with open(fname, 'wb') as f:\n",
    "        f.write('\\n'.join(L).encode('utf-8'))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.14.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}