690 lines
30 KiB
Plaintext
690 lines
30 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "21877801",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Python Generator\n",
|
|
"\n",
|
|
"This python notebook will serve to generate the necessary code to\n",
|
|
"generate some things from Spider.\n",
|
|
"\n",
|
|
"Specifically, it will generate the CPU instructions (currently)."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "b0fcd533",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Repo root : ./ -> (True)\n",
|
|
"CPU.hpp : .//src//spider/runtime/cpu/CPU.hpp -> (True)\n",
|
|
"XLSX : .//docs//Spider Instructions.xlsx -> (True)\n",
|
|
"Output dir: .//autogen/ -> (True)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# setup directories\n",
|
|
"import os\n",
|
|
"\n",
|
|
"# [CHANGE]\n",
|
|
"# Since we're running on a local environment (i hope)\n",
|
|
"# we can just signal a relative directory.\n",
|
|
"REPO_ROOT = './'\n",
|
|
"DOCS_ROOT = f'{REPO_ROOT}/docs/'\n",
|
|
"SRC_ROOT = f'{REPO_ROOT}/src/'\n",
|
|
"\n",
|
|
"# Where CPU.hpp lives — this is the file we will inject generated code into.\n",
|
|
"CPU_HPP_PATH = f'{SRC_ROOT}/spider/runtime/cpu/CPU.hpp'\n",
|
|
"\n",
|
|
"# Where the Excel instruction sheet lives. Allocate the .xlsx file in the project's root folder.\n",
|
|
"# NOTE: The file I uploaded has a space instead of underscore!\n",
|
|
"XLSX_PATH = f'{DOCS_ROOT}/Spider Instructions.xlsx'\n",
|
|
"\n",
|
|
"# Output folder for any standalone generated files.\n",
|
|
"OUT_DIR = f'{REPO_ROOT}/autogen/'\n",
|
|
"\n",
|
|
"# Create the output directory if it does not exist yet.\n",
|
|
"# exist_ok=True means no error if it already exists.\n",
|
|
"os.makedirs(OUT_DIR, exist_ok=True)\n",
|
|
"\n",
|
|
"def dir_exists(path:str):\n",
|
|
" return os.path.exists(path) and os.path.isdir(path)\n",
|
|
"def file_exists(path:str):\n",
|
|
" return os.path.exists(path) and os.path.isfile(path)\n",
|
|
"\n",
|
|
"print(f'Repo root : {REPO_ROOT } -> ({ dir_exists(REPO_ROOT )})')\n",
|
|
"print(f'CPU.hpp : {CPU_HPP_PATH} -> ({file_exists(CPU_HPP_PATH)})')\n",
|
|
"print(f'XLSX : {XLSX_PATH } -> ({file_exists(XLSX_PATH )})')\n",
|
|
"print(f'Output dir: {OUT_DIR } -> ({ dir_exists(OUT_DIR )})')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "b33de8ac",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"--- Sample output for NOP ---\n",
|
|
" // [System] 0x000 — NOP: No Operation\n",
|
|
" // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
|
|
" // Operation: Nothing\n",
|
|
" void NOP();\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Implement here some kind of \"C++\" printer\n",
|
|
"\n",
|
|
"# ── Indent used throughout the generated block ──────────────────────────────\n",
|
|
"INDENT = ' ' # 8 spaces — matches the indentation inside CPU.hpp\n",
|
|
"\n",
|
|
"def format_instruction(byte_code: str, mnemonic: str, name: str,\n",
|
|
" group: str, params: int,\n",
|
|
" addr_mask_1: str, addr_mask_2: str,\n",
|
|
" type_mask: str, operation: str) -> str:\n",
|
|
" \"\"\"\n",
|
|
" Returns a single C++ instruction declaration as a string.\n",
|
|
"\n",
|
|
" Each instruction becomes a commented constant inside the CPU class.\n",
|
|
" Format:\n",
|
|
" // [GROUP] 0xBYTE — MNEMONIC: Name\n",
|
|
" // Params: N | AddrMask1: XX AddrMask2: XX | TypeMask: XX\n",
|
|
" // Operation: ...\n",
|
|
" MNEMONIC\n",
|
|
" \"\"\"\n",
|
|
" lines = []\n",
|
|
"\n",
|
|
" # Header comment: group, opcode, mnemonic and human-readable name.\n",
|
|
" lines.append(f'{INDENT}// [{group}] 0x{byte_code} — {mnemonic}: {name}')\n",
|
|
"\n",
|
|
" # Second comment line: parameter count, addressing masks, type size mask.\n",
|
|
" lines.append(f'{INDENT}// Params: {params} | '\n",
|
|
" f'AddrMask1: {addr_mask_1} AddrMask2: {addr_mask_2} | '\n",
|
|
" f'TypeMask: {type_mask}')\n",
|
|
"\n",
|
|
" # Third comment line: what this instruction actually does.\n",
|
|
" lines.append(f'{INDENT}// Operation: {operation}')\n",
|
|
"\n",
|
|
" # The declaration itself — just the mnemonic name, matching NOP/SPDR style.\n",
|
|
" lines.append(f'{INDENT}void {mnemonic}();') # method declaration inside CPU class # enum value: NAME = 0xOPCODE,\n",
|
|
"\n",
|
|
" # Empty line between instructions for readability.\n",
|
|
" lines.append('')\n",
|
|
"\n",
|
|
" return '\\n'.join(lines)\n",
|
|
"\n",
|
|
"\n",
|
|
"def format_block(instructions: list) -> str:\n",
|
|
" \"\"\"\n",
|
|
" Joins all individual instruction strings into one complete block.\n",
|
|
" This is the text that will be injected between the pygen-target markers.\n",
|
|
" \"\"\"\n",
|
|
" # Join every formatted instruction into one big string.\n",
|
|
" return '\\n'.join(instructions)\n",
|
|
"\n",
|
|
"\n",
|
|
"# Print what one instruction looks like.\n",
|
|
"sample = format_instruction('000','NOP','No Operation','System',0,'00','00','00','Nothing')\n",
|
|
"print('--- Sample output for NOP ---')\n",
|
|
"print(sample)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "58645013",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Real instructions : 128\n",
|
|
"Reserved slots : 14\n",
|
|
"Duplicate check : PASSED\n",
|
|
"\n",
|
|
"Groups found:\n",
|
|
"group\n",
|
|
"Integer 19\n",
|
|
"System 15\n",
|
|
"Bit Wise 14\n",
|
|
"Boolean 12\n",
|
|
"Branch 12\n",
|
|
"Floating Point 10\n",
|
|
"Casts 10\n",
|
|
"Memory 9\n",
|
|
"Trigonometric 7\n",
|
|
"Exponential 6\n",
|
|
"Matrix 6\n",
|
|
"SIMD 5\n",
|
|
"Quaternion 2\n",
|
|
"Easter Eggs 1\n",
|
|
"\n",
|
|
"First 5 instructions:\n",
|
|
" byte_code mnemonic group params addr_mask_1 type_mask\n",
|
|
"0 000 NOP System 0 00 00\n",
|
|
"1 001 SPDR System 0 00 00\n",
|
|
"2 002 MMODE System 1 05 01\n",
|
|
"3 003 INT System 1 1F 08\n",
|
|
"4 004 LRV System 1 1F 08\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# read the instruction sheet with pandas\n",
|
|
"\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"# -- Load --------------------------------------------------------------------\n",
|
|
"# The data is on the 'Instructions' sheet. Header is on row index 6 (0-based),\n",
|
|
"# so we skip the first 6 rows of decorative merged cells.\n",
|
|
"raw = pd.read_excel(XLSX_PATH, sheet_name='Instructions', header=6)\n",
|
|
"\n",
|
|
"# Rename the two unnamed columns that hold the two addressing mode masks.\n",
|
|
"# In the sheet they appear after 'Acc. Addr. Mode Mask' with no header label.\n",
|
|
"raw.columns = [\n",
|
|
" 'skip_0', # empty column A\n",
|
|
" 'skip_1', # 'Base Instr.' label column\n",
|
|
" 'byte_code', # opcode hex string e.g. '000'\n",
|
|
" 'mnemonic', # short name e.g. 'NOP'\n",
|
|
" 'name', # full name e.g. 'No Operation'\n",
|
|
" 'group', # category e.g. 'System'\n",
|
|
" 'params', # number of parameters (0, 1, or 2)\n",
|
|
" 'imp', # addressing mode: Implied\n",
|
|
" 'imm', # addressing mode: Immediate\n",
|
|
" 'abs', # addressing mode: Absolute\n",
|
|
" 'reg', # addressing mode: Register\n",
|
|
" 'ind', # addressing mode: Indirect\n",
|
|
" 'ptr', # addressing mode: Pointer\n",
|
|
" 'idx', # addressing mode: Indexed\n",
|
|
" 'sca', # addressing mode: Scaled\n",
|
|
" 'dis', # addressing mode: Displaced\n",
|
|
" 'addr_mask_1', # accepted addressing mode mask for param 1\n",
|
|
" 'addr_mask_2', # accepted addressing mode mask for param 2\n",
|
|
" 'ignores_addrm',# whether the instruction ignores addressing modes\n",
|
|
" 'B', # type size: Byte (1 byte) supported?\n",
|
|
" 'S', # type size: Short (2 bytes) supported?\n",
|
|
" 'I', # type size: Int (4 bytes) supported?\n",
|
|
" 'L', # type size: Long (8 bytes) supported?\n",
|
|
" 'F', # type size: Float supported?\n",
|
|
" 'D', # type size: Double supported?\n",
|
|
" 'type_mask', # combined type size mask as hex string\n",
|
|
" 'expensive', # marks computationally expensive instructions\n",
|
|
" 'operation', # human-readable description of what the instruction does\n",
|
|
" 'skip_2', # trailing empty column\n",
|
|
"]\n",
|
|
"\n",
|
|
"# ── Filter ───────────────────────────────────────────────────────────────────\n",
|
|
"# Keep only rows that have a byte_code value (drops empty rows at the bottom).\n",
|
|
"df = raw[raw['byte_code'].notna()].copy()\n",
|
|
"\n",
|
|
"# Separate reserved slots from real instructions.\n",
|
|
"# Reserved entries have '(reserved)' in the mnemonic column.\n",
|
|
"is_reserved = df['mnemonic'].astype(str).str.contains('reserved', case=False, na=False)\n",
|
|
"reserved_df = df[is_reserved].copy() # keep for reference\n",
|
|
"instrs_df = df[~is_reserved & df['mnemonic'].notna()].copy() # real instructions only\n",
|
|
"\n",
|
|
"# Skip incomplete entries — rows with no group are placeholder slots (e.g. Int 1-6 Slot)\n",
|
|
"# that have no defined behaviour yet. Keeping them would generate invalid C++ identifiers.\n",
|
|
"instrs_df = instrs_df[instrs_df['group'].notna()].copy()\n",
|
|
"\n",
|
|
"# ── Clean ────────────────────────────────────────────────────────────────────\n",
|
|
"# Fill NaN masks with '00' (means 'no modes accepted' — safe default).\n",
|
|
"instrs_df['addr_mask_1'] = instrs_df['addr_mask_1'].fillna('00').astype(str).str.strip()\n",
|
|
"instrs_df['addr_mask_2'] = instrs_df['addr_mask_2'].fillna('00').astype(str).str.strip()\n",
|
|
"instrs_df['type_mask'] = instrs_df['type_mask'].fillna('00').astype(str).str.strip()\n",
|
|
"instrs_df['params'] = instrs_df['params'].fillna(0).astype(int)\n",
|
|
"instrs_df['name'] = instrs_df['name'].fillna('').astype(str).str.strip()\n",
|
|
"instrs_df['group'] = instrs_df['group'].fillna('Unknown').astype(str).str.strip()\n",
|
|
"instrs_df['operation'] = instrs_df['operation'].fillna('').astype(str).str.strip()\n",
|
|
"\n",
|
|
"# ── Sanitize mnemonics ──────────────────────────────────────────────────────\n",
|
|
"# C++ identifiers cannot contain spaces. Replace spaces with underscores and\n",
|
|
"# convert to uppercase so 'Int 1 Slot' becomes 'INT_1_SLOT'.\n",
|
|
"instrs_df['mnemonic'] = (\n",
|
|
" instrs_df['mnemonic']\n",
|
|
" .astype(str)\n",
|
|
" .str.strip() # remove leading/trailing whitespace\n",
|
|
" .str.replace(' ', '_') # replace internal spaces with underscores\n",
|
|
" .str.upper() # uppercase for consistency\n",
|
|
")\n",
|
|
"\n",
|
|
"# ── Validate: duplicate mnemonics ────────────────────────────────────────────\n",
|
|
"# Duplicates in real instruction names would cause C++ compilation errors.\n",
|
|
"# We abort here rather than generating broken code.\n",
|
|
"mnemonic_counts = instrs_df['mnemonic'].value_counts()\n",
|
|
"duplicates = mnemonic_counts[mnemonic_counts > 1]\n",
|
|
"if not duplicates.empty:\n",
|
|
" # Show which mnemonics are duplicated before raising the error.\n",
|
|
" raise ValueError(f'Duplicate mnemonics found — fix the sheet before generating:\\n{duplicates}')\n",
|
|
"\n",
|
|
"print(f'Real instructions : {len(instrs_df)}')\n",
|
|
"print(f'Reserved slots : {len(reserved_df)}')\n",
|
|
"print(f'Duplicate check : PASSED')\n",
|
|
"print(f'\\nGroups found:')\n",
|
|
"print(instrs_df['group'].value_counts().to_string())\n",
|
|
"print(f'\\nFirst 5 instructions:')\n",
|
|
"print(instrs_df[['byte_code','mnemonic','group','params','addr_mask_1','type_mask']].head().to_string())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "452bc76c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Masks written to: .//autogen/InstructionMasks.hpp\n",
|
|
"Lines generated : 272\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# well, then export the masks (TODO)\n",
|
|
"\n",
|
|
"\n",
|
|
"# ── Build the masks header content ──────────────────────────────────────────\n",
|
|
"lines = []\n",
|
|
"\n",
|
|
"# Standard C++ header guard — prevents the file from being included more than once.\n",
|
|
"lines.append('#pragma once')\n",
|
|
"lines.append('// AUTO-GENERATED by pygen.ipynb — DO NOT EDIT MANUALLY')\n",
|
|
"lines.append('#include <spider/runtime/common.hpp>')\n",
|
|
"lines.append('')\n",
|
|
"lines.append('namespace spider {')\n",
|
|
"lines.append('')\n",
|
|
"\n",
|
|
"# ── Addressing mode mask table ───────────────────────────────────────────────\n",
|
|
"# Each instruction has two masks (one per parameter).\n",
|
|
"# We write them as a constexpr array so the VM can look them up at runtime\n",
|
|
"# using the opcode as the index.\n",
|
|
"lines.append('// Addressing mode masks — indexed by opcode.')\n",
|
|
"lines.append('// [opcode][0] = mask for param 1, [opcode][1] = mask for param 2')\n",
|
|
"lines.append('constexpr u8 ADDR_MODE_MASKS[][2] = {')\n",
|
|
"\n",
|
|
"for _, row in instrs_df.iterrows():\n",
|
|
" # Convert the hex string mask to an integer for the C++ literal.\n",
|
|
" m1 = row['addr_mask_1'].replace('.0','').strip() # remove pandas float artefact\n",
|
|
" m2 = row['addr_mask_2'].replace('.0','').strip()\n",
|
|
" m1 = m1 if m1 != 'nan' else '00'\n",
|
|
" m2 = m2 if m2 != 'nan' else '00'\n",
|
|
" # Each row: { 0xMASK1, 0xMASK2 }, // MNEMONIC\n",
|
|
" lines.append(f' {{ 0x{m1.upper()}, 0x{m2.upper()} }}, // {row[\"mnemonic\"]}')\n",
|
|
"\n",
|
|
"lines.append('};')\n",
|
|
"lines.append('')\n",
|
|
"\n",
|
|
"# ── Type size mask table ─────────────────────────────────────────────────────\n",
|
|
"# A single byte per instruction encoding which type sizes it accepts.\n",
|
|
"lines.append('// Type size masks — indexed by opcode.')\n",
|
|
"lines.append('constexpr u8 TYPE_SIZE_MASKS[] = {')\n",
|
|
"\n",
|
|
"for _, row in instrs_df.iterrows():\n",
|
|
" tm = str(row['type_mask']).replace('.0','').strip()\n",
|
|
" tm = tm if tm != 'nan' else '00'\n",
|
|
" lines.append(f' 0x{tm.upper()}, // {row[\"mnemonic\"]}')\n",
|
|
"\n",
|
|
"lines.append('};')\n",
|
|
"lines.append('')\n",
|
|
"lines.append('} // namespace spider')\n",
|
|
"\n",
|
|
"# ── Write to file ────────────────────────────────────────────────────────────\n",
|
|
"masks_path = os.path.join(OUT_DIR, 'InstructionMasks.hpp')\n",
|
|
"with open(masks_path, 'w', encoding='utf-8') as f:\n",
|
|
" # Join with Unix line endings only — repo etiquette says no \\r\\n.\n",
|
|
" f.write('\\n'.join(lines))\n",
|
|
"\n",
|
|
"print(f'Masks written to: {masks_path}')\n",
|
|
"print(f'Lines generated : {len(lines)}')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "5aaebef0",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Instructions formatted: 128\n",
|
|
"\n",
|
|
"--- Preview (first 2 instructions) ---\n",
|
|
" // [System] 0x000 — NOP: No Operation\n",
|
|
" // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
|
|
" // Operation: Nothing\n",
|
|
" void NOP();\n",
|
|
"\n",
|
|
" // [System] 0x001 — SPDR: Will place the Spider version of the interpreter in RA\n",
|
|
" // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
|
|
" // Operation: (Spider Version) -> RA\n",
|
|
" void SPDR();\n",
|
|
"\n",
|
|
"\n",
|
|
"CPU.hpp updated successfully at: .//src//spider/runtime/cpu/CPU.hpp\n",
|
|
"Total lines in updated file: 883\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# print the CPU Instructions\n",
|
|
"\n",
|
|
"# ── Generate all instruction declarations ───────────────────────────────────\n",
|
|
"formatted = []\n",
|
|
"\n",
|
|
"for _, row in instrs_df.iterrows():\n",
|
|
" # Clean each field — remove pandas float artefacts like '00.0'\n",
|
|
" byte_code = str(row['byte_code']).strip()\n",
|
|
" mnemonic = str(row['mnemonic']).strip()\n",
|
|
" name = str(row['name']).strip()\n",
|
|
" group = str(row['group']).strip()\n",
|
|
" params = int(row['params'])\n",
|
|
" addr_mask_1 = str(row['addr_mask_1']).replace('.0', '').strip()\n",
|
|
" addr_mask_2 = str(row['addr_mask_2']).replace('.0', '').strip()\n",
|
|
" type_mask = str(row['type_mask']).replace('.0', '').strip()\n",
|
|
" operation = str(row['operation']).strip()\n",
|
|
"\n",
|
|
" # Call the C++ printer from Cell 2 to format this instruction.\n",
|
|
" formatted.append(format_instruction(\n",
|
|
" byte_code, mnemonic, name, group,\n",
|
|
" params, addr_mask_1, addr_mask_2,\n",
|
|
" type_mask, operation\n",
|
|
" ))\n",
|
|
"\n",
|
|
"# Combine all declarations into one block string.\n",
|
|
"generated_block = format_block(formatted)\n",
|
|
"\n",
|
|
"print(f'Instructions formatted: {len(formatted)}')\n",
|
|
"print('\\n--- Preview (first 2 instructions) ---')\n",
|
|
"print('\\n'.join(formatted[:2]))\n",
|
|
"\n",
|
|
"# ── Inject into CPU.hpp ──────────────────────────────────────────────────────\n",
|
|
"# The markers tell us exactly where to insert the generated block.\n",
|
|
"MARKER_OPEN = '// <pygen-target name=cpu-instructions> //'\n",
|
|
"MARKER_CLOSE = '// </pygen-target> //'\n",
|
|
"\n",
|
|
"# Read the current CPU.hpp content.\n",
|
|
"with open(CPU_HPP_PATH, 'r', encoding='utf-8') as f:\n",
|
|
" original = f.read()\n",
|
|
"\n",
|
|
"# Verify both markers exist before modifying anything.\n",
|
|
"# If either is missing, the file was edited by hand — abort to avoid corruption.\n",
|
|
"if MARKER_OPEN not in original:\n",
|
|
" raise ValueError(f'Open marker not found in CPU.hpp: {MARKER_OPEN}')\n",
|
|
"if MARKER_CLOSE not in original:\n",
|
|
" raise ValueError(f'Close marker not found in CPU.hpp: {MARKER_CLOSE}')\n",
|
|
"\n",
|
|
"# Split the file into 3 parts around the pygen-target markers.\n",
|
|
"# before : everything up to and including the open marker\n",
|
|
"# after : from the close marker onward (including it)\n",
|
|
"before = original[:original.index(MARKER_OPEN) + len(MARKER_OPEN)]\n",
|
|
"after = original[original.index(MARKER_CLOSE):]\n",
|
|
"\n",
|
|
"# Reassemble: keep before, inject the generated block, then restore after.\n",
|
|
"updated = before + '\\n' + generated_block + '\\n' + INDENT + after\n",
|
|
"\n",
|
|
"# Write back using UTF-8 and Unix line endings only (repo etiquette: no \\r\\n).\n",
|
|
"with open(CPU_HPP_PATH, 'w', encoding='utf-8', newline='\\n') as f:\n",
|
|
" f.write(updated)\n",
|
|
"\n",
|
|
"print(f'\\nCPU.hpp updated successfully at: {CPU_HPP_PATH}')\n",
|
|
"print(f'Total lines in updated file: {len(updated.splitlines())}')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "instrmap_gen",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"InstrMap.cpp written to: .//src//spider/runtime/instr/InstrMap.cpp\n",
|
|
" Size : 34,157 bytes\n",
|
|
" Array entries : 512 (128 populated, 384 nullptr)\n",
|
|
" Switch cases : 128\n",
|
|
" Line endings : LF-only verified\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# ── Generate InstrMap.cpp ────────────────────────────────────────────────────\n",
|
|
"# Produces two dispatch implementations in one file:\n",
|
|
"# 1. CPUInstr InstrMap[512] — array of member-function pointers\n",
|
|
"# 2. void CPU::execute(u16) — switch/case version\n",
|
|
"#\n",
|
|
"# Both use UPPERCASE method names matching the mnemonic column.\n",
|
|
"\n",
|
|
"TABLE_SIZE = 512 # 9-bit opcode space\n",
|
|
"\n",
|
|
"# Build opcode -> mnemonic lookup from the cleaned instruction DataFrame.\n",
|
|
"opcode_to_mnem: dict[int, str] = {}\n",
|
|
"opcode_to_name: dict[int, str] = {}\n",
|
|
"opcode_to_group: dict[int, str] = {}\n",
|
|
"\n",
|
|
"for _, row in instrs_df.iterrows():\n",
|
|
" bc = str(row['byte_code']).strip()\n",
|
|
" opc = int(bc, 16)\n",
|
|
" opcode_to_mnem[opc] = str(row['mnemonic']).strip()\n",
|
|
" opcode_to_name[opc] = str(row['name']).strip()\n",
|
|
" opcode_to_group[opc] = str(row['group']).strip()\n",
|
|
"\n",
|
|
"# Also track reserved slots for annotation.\n",
|
|
"reserved_opcodes: set[int] = set()\n",
|
|
"for _, row in reserved_df.iterrows():\n",
|
|
" bc = str(row['byte_code']).strip()\n",
|
|
" if bc and bc != 'nan':\n",
|
|
" reserved_opcodes.add(int(bc, 16))\n",
|
|
"\n",
|
|
"# ── Assemble the file ───────────────────────────────────────────────────────\n",
|
|
"L = []\n",
|
|
"L.append('/**')\n",
|
|
"L.append(' * @file InstrMap.cpp')\n",
|
|
"L.append(' * @brief Spider VM instruction dispatch — array and switch implementations.')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * AUTO-GENERATED by pygen.ipynb — DO NOT EDIT BY HAND.')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * This file provides two equivalent dispatch mechanisms:')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * 1. InstrMap[] — A lookup table of member-function pointers indexed by')\n",
|
|
"L.append(' * opcode. O(1) dispatch; suitable for platforms where')\n",
|
|
"L.append(' * indirect calls through function pointers are efficient.')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * 2. CPU::execute(u16) — A switch/case over every opcode. Lets the')\n",
|
|
"L.append(' * compiler emit a jump table or branch tree; may be')\n",
|
|
"L.append(' * preferable on microcontrollers or when link-time')\n",
|
|
"L.append(' * optimisation can inline the handlers.')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' */')\n",
|
|
"L.append('') # [CHANGE] Use absolute path to make paths more explicit\n",
|
|
"L.append('#include <spider/runtime/cpu/CPU.hpp>')\n",
|
|
"L.append('')\n",
|
|
"L.append('namespace spider {')\n",
|
|
"L.append('')\n",
|
|
"\n",
|
|
"# ── Version 1: Array: ────────────────────────────────────────────────────────\n",
|
|
"L.append('// =============================================================')\n",
|
|
"L.append('// Version 1 — Lookup table of member-function pointers')\n",
|
|
"L.append('// =============================================================')\n",
|
|
"L.append('')\n",
|
|
"# [CHANGE] Use CPU::Fn Instead\n",
|
|
"#L.append('/** Pointer-to-member type for a zero-argument CPU instruction. */')\n",
|
|
"#L.append('using CPUInstr = void (CPU::*)();')\n",
|
|
"L.append('')\n",
|
|
"L.append('/**')\n",
|
|
"L.append(f' * Instruction dispatch table ({TABLE_SIZE} entries, 9-bit opcode space).')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * Usage:')\n",
|
|
"L.append(' * u16 opcode = fetch();')\n",
|
|
"L.append(' * CPU::Fn fn = InstrMap[opcode];')\n",
|
|
"L.append(' * if (fn) (cpu.*fn)();')\n",
|
|
"L.append(' */') # [CHANGE] Made it part of the CPU & avoided explicit size.\n",
|
|
"L.append(f'CPU::Fn CPU::instrMap[] = {{')\n",
|
|
"\n",
|
|
"for opc in range(TABLE_SIZE):\n",
|
|
" mnem = opcode_to_mnem.get(opc)\n",
|
|
" if mnem:\n",
|
|
" name = opcode_to_name[opc]\n",
|
|
" L.append(f' &CPU::{mnem + \",\":<28s}// 0x{opc:03X} — {name}')\n",
|
|
" else:\n",
|
|
" tag = ''\n",
|
|
" if opc in reserved_opcodes:\n",
|
|
" tag = ' (reserved)'\n",
|
|
" L.append(f' {\"nullptr,\":<28s}// 0x{opc:03X}{tag}')\n",
|
|
"\n",
|
|
"L.append('};')\n",
|
|
"L.append('')\n",
|
|
"L.append('')\n",
|
|
"\n",
|
|
"# ── Version 2: Switch ──────────────────────────────────────────────────────\n",
|
|
"L.append('// =============================================================')\n",
|
|
"L.append('// Version 2 — Switch dispatch')\n",
|
|
"L.append('// =============================================================')\n",
|
|
"L.append('')\n",
|
|
"L.append('/**')\n",
|
|
"L.append(' * Execute the instruction identified by @p opcode.')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * This is functionally equivalent to the InstrMap[] table above')\n",
|
|
"L.append(' * but expressed as a switch so the compiler can choose the best')\n",
|
|
"L.append(' * lowering strategy (jump table, binary search, etc.).')\n",
|
|
"L.append(' *')\n",
|
|
"L.append(' * @param opcode 9-bit instruction opcode (0x000 - 0x1FF).')\n",
|
|
"L.append(' */')\n",
|
|
"L.append('void CPU::executeSwLk() {')\n",
|
|
"L.append(' switch (_opcode) {')\n",
|
|
"\n",
|
|
"last_group = None\n",
|
|
"for opc in sorted(opcode_to_mnem.keys()):\n",
|
|
" mnem = opcode_to_mnem[opc]\n",
|
|
" group = opcode_to_group[opc]\n",
|
|
" if group != last_group:\n",
|
|
" L.append('')\n",
|
|
" L.append(f' // ── {group} ' + '─' * max(1, 44 - len(group)))\n",
|
|
" last_group = group\n",
|
|
" L.append(f' case 0x{opc:03X}: {mnem}(); break;')\n",
|
|
"\n",
|
|
"L.append('')\n",
|
|
"L.append(' default:')\n",
|
|
"L.append(' break;')\n",
|
|
"L.append(' }')\n",
|
|
"L.append('}')\n",
|
|
"L.append('')\n",
|
|
"L.append('} // namespace spider')\n",
|
|
"L.append('')\n",
|
|
"\n",
|
|
"INSTRMAP_SRC = '\\n'.join(L)\n",
|
|
"\n",
|
|
"# ── Write to file ───────────────────────────────────────────────────────────\n",
|
|
"# [CHANGE] Write this in the instructions folder to avoid CPU file bloat\n",
|
|
"INSTRMAP_PATH = f'{SRC_ROOT}/spider/runtime/instr/InstrMap.cpp'\n",
|
|
"\n",
|
|
"with open(INSTRMAP_PATH, 'wb') as f:\n",
|
|
" f.write(INSTRMAP_SRC.encode('utf-8'))\n",
|
|
"\n",
|
|
"# Verify LF-only\n",
|
|
"with open(INSTRMAP_PATH, 'rb') as f:\n",
|
|
" raw_bytes = f.read()\n",
|
|
"assert b'\\r' not in raw_bytes, 'CRLF detected in InstrMap.cpp!'\n",
|
|
"\n",
|
|
"array_count = INSTRMAP_SRC.count('&CPU::')\n",
|
|
"switch_count = INSTRMAP_SRC.count('case 0x')\n",
|
|
"\n",
|
|
"print(f'InstrMap.cpp written to: {INSTRMAP_PATH}')\n",
|
|
"print(f' Size : {len(raw_bytes):,} bytes')\n",
|
|
"print(f' Array entries : {TABLE_SIZE} ({array_count} populated, {TABLE_SIZE - array_count} nullptr)')\n",
|
|
"print(f' Switch cases : {switch_count}')\n",
|
|
"print(f' Line endings : LF-only verified')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "9f190f4c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# ---------- GENERATE INSTR_XX FILES ----------\n",
|
|
"INSTR_DIR = f'{SRC_ROOT}/spider/runtime/instr'\n",
|
|
"\n",
|
|
"# Generate the files\n",
|
|
"# Each file goes from 00 to 1F (0-31), 32 instr / file\n",
|
|
"for x0 in range(0, TABLE_SIZE, 32):\n",
|
|
" x1 = x0 + 31\n",
|
|
" fname = f'{INSTR_DIR}/Instr_{x0:03X}-{x1:03X}.cpp'\n",
|
|
"\n",
|
|
" # TODO: Check if there are missing instructions\n",
|
|
" if file_exists(fname):\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # CREATE FILE FROM SCRATCH\n",
|
|
" L = []\n",
|
|
" L.append('/**')\n",
|
|
" L.append(' * @brief AUTO-GENERATED by pygen.ipynb BUT editable by hand!')\n",
|
|
" L.append(' *')\n",
|
|
" L.append(' */')\n",
|
|
" L.append('') # [CHANGE] Use absolute path to make paths more explicit\n",
|
|
" L.append('#include <spider/runtime/cpu/CPU.hpp>')\n",
|
|
" L.append('')\n",
|
|
" L.append('namespace spider {')\n",
|
|
" L.append('')\n",
|
|
"\n",
|
|
" for opc in range(x0, x1 + 1):\n",
|
|
" mnem = opcode_to_mnem.get(opc)\n",
|
|
" if mnem is None: continue\n",
|
|
"\n",
|
|
" L.append(f' void CPU::{mnem}() {{')\n",
|
|
" L.append(f' // TODO: Implement {mnem}')\n",
|
|
" L.append(f' }}')\n",
|
|
" L.append('')\n",
|
|
"\n",
|
|
" L.append('}')\n",
|
|
" L.append('')\n",
|
|
"\n",
|
|
" with open(fname, 'wb') as f:\n",
|
|
" f.write('\\n'.join(L).encode('utf-8'))\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.14.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|