Add InstrMap.cpp generation to pygen

This commit is contained in:
Diego De Gante Pérez
2026-03-25 10:15:56 -06:00
parent e24e8dfe2d
commit 291aa0a949
5 changed files with 1187 additions and 257 deletions

View File

@@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 34,
"id": "b0fcd533",
"metadata": {},
"outputs": [
@@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 35,
"id": "b33de8ac",
"metadata": {},
"outputs": [
@@ -144,7 +144,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 36,
"id": "58645013",
"metadata": {},
"outputs": [
@@ -152,7 +152,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Real instructions : 126\n",
"Real instructions : 128\n",
"Reserved slots : 14\n",
"Duplicate check : PASSED\n",
"\n",
@@ -163,13 +163,14 @@
"Bit Wise 14\n",
"Boolean 12\n",
"Branch 12\n",
"Casts 10\n",
"Floating Point 10\n",
"Casts 10\n",
"Memory 9\n",
"Trigonometric 7\n",
"Exponential 6\n",
"Matrix 6\n",
"SIMD 5\n",
"Quaternion 2\n",
"Easter Eggs 1\n",
"\n",
"First 5 instructions:\n",
@@ -177,8 +178,8 @@
"0 000 NOP System 0 00 00\n",
"1 001 SPDR System 0 00 00\n",
"2 002 MMODE System 1 05 01\n",
"3 003 INT System 1 1F 0F\n",
"4 004 LRV System 1 1F 0C\n"
"3 003 INT System 1 1F 08\n",
"4 004 LRV System 1 1F 08\n"
]
}
],
@@ -214,6 +215,7 @@
" 'dis', # addressing mode: Displaced\n",
" 'addr_mask_1', # accepted addressing mode mask for param 1\n",
" 'addr_mask_2', # accepted addressing mode mask for param 2\n",
" 'ignores_addrm',# whether the instruction ignores addressing modes\n",
" 'B', # type size: Byte (1 byte) supported?\n",
" 'S', # type size: Short (2 bytes) supported?\n",
" 'I', # type size: Int (4 bytes) supported?\n",
@@ -221,6 +223,7 @@
" 'F', # type size: Float supported?\n",
" 'D', # type size: Double supported?\n",
" 'type_mask', # combined type size mask as hex string\n",
" 'expensive', # marks computationally expensive instructions\n",
" 'operation', # human-readable description of what the instruction does\n",
" 'skip_2', # trailing empty column\n",
"]\n",
@@ -280,7 +283,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 37,
"id": "452bc76c",
"metadata": {},
"outputs": [
@@ -289,7 +292,7 @@
"output_type": "stream",
"text": [
"Masks written to: .//autogen/InstructionMasks.hpp\n",
"Lines generated : 268\n"
"Lines generated : 272\n"
]
}
],
@@ -354,7 +357,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 38,
"id": "5aaebef0",
"metadata": {},
"outputs": [
@@ -362,7 +365,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Instructions formatted: 126\n",
"Instructions formatted: 128\n",
"\n",
"--- Preview (first 2 instructions) ---\n",
" // [System] 0x000 — NOP: No Operation\n",
@@ -377,7 +380,7 @@
"\n",
"\n",
"CPU.hpp updated successfully at: .//src//spider/runtime/cpu/CPU.hpp\n",
"Total lines in updated file: 674\n"
"Total lines in updated file: 792\n"
]
}
],
@@ -445,6 +448,171 @@
"print(f'\\nCPU.hpp updated successfully at: {CPU_HPP_PATH}')\n",
"print(f'Total lines in updated file: {len(updated.splitlines())}')\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "instrmap_gen",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"InstrMap.cpp written to: .//src//spider/runtime/cpu/InstrMap.cpp\n",
" Size : 34,246 bytes\n",
" Array entries : 512 (128 populated, 384 nullptr)\n",
" Switch cases : 128\n",
" Line endings : LF-only verified\n"
]
}
],
"source": [
"# ── Generate InstrMap.cpp ────────────────────────────────────────────────────\n",
"# Produces two dispatch implementations in one file:\n",
"# 1. CPUInstr InstrMap[512] — array of member-function pointers\n",
"# 2. void CPU::execute(u16) — switch/case version\n",
"#\n",
"# Both use UPPERCASE method names matching the mnemonic column.\n",
"\n",
"TABLE_SIZE = 512 # 9-bit opcode space\n",
"\n",
"# Build opcode -> mnemonic lookup from the cleaned instruction DataFrame.\n",
"opcode_to_mnem: dict[int, str] = {}\n",
"opcode_to_name: dict[int, str] = {}\n",
"opcode_to_group: dict[int, str] = {}\n",
"\n",
"for _, row in instrs_df.iterrows():\n",
" bc = str(row['byte_code']).strip()\n",
" opc = int(bc, 16)\n",
" opcode_to_mnem[opc] = str(row['mnemonic']).strip()\n",
" opcode_to_name[opc] = str(row['name']).strip()\n",
" opcode_to_group[opc] = str(row['group']).strip()\n",
"\n",
"# Also track reserved slots for annotation.\n",
"reserved_opcodes: set[int] = set()\n",
"for _, row in reserved_df.iterrows():\n",
" bc = str(row['byte_code']).strip()\n",
" if bc and bc != 'nan':\n",
" reserved_opcodes.add(int(bc, 16))\n",
"\n",
"# ── Assemble the file ───────────────────────────────────────────────────────\n",
"L = []\n",
"L.append('/**')\n",
"L.append(' * @file InstrMap.cpp')\n",
"L.append(' * @brief Spider VM instruction dispatch — array and switch implementations.')\n",
"L.append(' *')\n",
"L.append(' * AUTO-GENERATED by pygen.ipynb — DO NOT EDIT BY HAND.')\n",
"L.append(' *')\n",
"L.append(' * This file provides two equivalent dispatch mechanisms:')\n",
"L.append(' *')\n",
"L.append(' * 1. InstrMap[] — A lookup table of member-function pointers indexed by')\n",
"L.append(' * opcode. O(1) dispatch; suitable for platforms where')\n",
"L.append(' * indirect calls through function pointers are efficient.')\n",
"L.append(' *')\n",
"L.append(' * 2. CPU::execute(u16) — A switch/case over every opcode. Lets the')\n",
"L.append(' * compiler emit a jump table or branch tree; may be')\n",
"L.append(' * preferable on microcontrollers or when link-time')\n",
"L.append(' * optimisation can inline the handlers.')\n",
"L.append(' *')\n",
"L.append(' */')\n",
"L.append('')\n",
"L.append('#include \"CPU.hpp\"')\n",
"L.append('')\n",
"L.append('namespace spider {')\n",
"L.append('')\n",
"\n",
"# ── Version 1: Array: ────────────────────────────────────────────────────────\n",
"L.append('// =============================================================')\n",
"L.append('// Version 1 — Lookup table of member-function pointers')\n",
"L.append('// =============================================================')\n",
"L.append('')\n",
"L.append('/** Pointer-to-member type for a zero-argument CPU instruction. */')\n",
"L.append('using CPUInstr = void (CPU::*)();')\n",
"L.append('')\n",
"L.append('/**')\n",
"L.append(f' * Instruction dispatch table ({TABLE_SIZE} entries, 9-bit opcode space).')\n",
"L.append(' *')\n",
"L.append(' * Usage:')\n",
"L.append(' * u16 opcode = fetch();')\n",
"L.append(' * CPUInstr fn = InstrMap[opcode];')\n",
"L.append(' * if (fn) (cpu.*fn)();')\n",
"L.append(' */')\n",
"L.append(f'CPUInstr InstrMap[{TABLE_SIZE}] = {{')\n",
"\n",
"for opc in range(TABLE_SIZE):\n",
" mnem = opcode_to_mnem.get(opc)\n",
" if mnem:\n",
" name = opcode_to_name[opc]\n",
" L.append(f' &CPU::{mnem + \",\":<28s}// 0x{opc:03X} — {name}')\n",
" else:\n",
" tag = ''\n",
" if opc in reserved_opcodes:\n",
" tag = ' (reserved)'\n",
" L.append(f' {\"nullptr,\":<28s}// 0x{opc:03X}{tag}')\n",
"\n",
"L.append('};')\n",
"L.append('')\n",
"L.append('')\n",
"\n",
"# ── Version 2: Switch ──────────────────────────────────────────────────────\n",
"L.append('// =============================================================')\n",
"L.append('// Version 2 — Switch dispatch')\n",
"L.append('// =============================================================')\n",
"L.append('')\n",
"L.append('/**')\n",
"L.append(' * Execute the instruction identified by @p opcode.')\n",
"L.append(' *')\n",
"L.append(' * This is functionally equivalent to the InstrMap[] table above')\n",
"L.append(' * but expressed as a switch so the compiler can choose the best')\n",
"L.append(' * lowering strategy (jump table, binary search, etc.).')\n",
"L.append(' *')\n",
"L.append(' * @param opcode 9-bit instruction opcode (0x000 0x1FF).')\n",
"L.append(' */')\n",
"L.append('void CPU::execute(u16 opcode) {')\n",
"L.append(' switch (opcode) {')\n",
"\n",
"last_group = None\n",
"for opc in sorted(opcode_to_mnem.keys()):\n",
" mnem = opcode_to_mnem[opc]\n",
" group = opcode_to_group[opc]\n",
" if group != last_group:\n",
" L.append('')\n",
" L.append(f' // ── {group} ' + '─' * max(1, 44 - len(group)))\n",
" last_group = group\n",
" L.append(f' case 0x{opc:03X}: {mnem}(); break;')\n",
"\n",
"L.append('')\n",
"L.append(' default:')\n",
"L.append(' break;')\n",
"L.append(' }')\n",
"L.append('}')\n",
"L.append('')\n",
"L.append('} // namespace spider')\n",
"L.append('')\n",
"\n",
"INSTRMAP_SRC = '\\n'.join(L)\n",
"\n",
"# ── Write to file ───────────────────────────────────────────────────────────\n",
"INSTRMAP_PATH = f'{SRC_ROOT}/spider/runtime/cpu/InstrMap.cpp'\n",
"\n",
"with open(INSTRMAP_PATH, 'wb') as f:\n",
" f.write(INSTRMAP_SRC.encode('utf-8'))\n",
"\n",
"# Verify LF-only\n",
"with open(INSTRMAP_PATH, 'rb') as f:\n",
" raw_bytes = f.read()\n",
"assert b'\\r' not in raw_bytes, 'CRLF detected in InstrMap.cpp!'\n",
"\n",
"array_count = INSTRMAP_SRC.count('&CPU::')\n",
"switch_count = INSTRMAP_SRC.count('case 0x')\n",
"\n",
"print(f'InstrMap.cpp written to: {INSTRMAP_PATH}')\n",
"print(f' Size : {len(raw_bytes):,} bytes')\n",
"print(f' Array entries : {TABLE_SIZE} ({array_count} populated, {TABLE_SIZE - array_count} nullptr)')\n",
"print(f' Switch cases : {switch_count}')\n",
"print(f' Line endings : LF-only verified')\n"
]
}
],
"metadata": {
@@ -463,7 +631,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
"version": "3.10.5"
}
},
"nbformat": 4,