Merge pull request 'arturo-pygen-branch' (#1) from arturo-pygen-branch into main

Reviewed-on: #1
This commit was merged in pull request #1.
This commit is contained in:
2026-03-20 16:49:37 +00:00
2 changed files with 389 additions and 14 deletions

View File

@@ -8,5 +8,6 @@ This is the Spider runtime (aka, the virtual machine) that executes the Spider b
- Do not modify the autogenerated files.
- If using an LLM, use private mode and tell it you're working on an old modem.
- If using an AI agent, don't.
- You need to re-run the pygen.ipynb to be up to date with the .xlsx instructions file.
Failure to uphold the code etiquette will result in a slap in the wrist, with a hammer.

View File

@@ -18,9 +18,43 @@
"execution_count": null,
"id": "b0fcd533",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Repo root : /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime\n",
"CPU.hpp : /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/src/spider/runtime/cpu/CPU.hpp\n",
"XLSX : /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/Spider_Instructions.xlsx\n",
"Output dir: /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/pygen_out\n"
]
}
],
"source": [
"# setup directories"
"# setup directories\n",
"\n",
"import os\n",
"\n",
"# Root of the Spider runtime repo — adjust this path to match your machine (folder where spider-runtime lives).\n",
"REPO_ROOT = os.path.abspath('/home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime')\n",
"\n",
"# Where CPU.hpp lives — this is the file we will inject generated code into.\n",
"CPU_HPP_PATH = os.path.join(REPO_ROOT, 'src', 'spider', 'runtime', 'cpu', 'CPU.hpp')\n",
"\n",
"# Where the Excel instruction sheet lives. Allocate the .xlsx file in the project's root folder.\n",
"XLSX_PATH = os.path.join(REPO_ROOT, 'Spider_Instructions.xlsx')\n",
"\n",
"# Output folder for any standalone generated files.\n",
"OUT_DIR = os.path.join(REPO_ROOT, 'pygen_out')\n",
"\n",
"# Create the output directory if it does not exist yet.\n",
"# exist_ok=True means no error if it already exists.\n",
"os.makedirs(OUT_DIR, exist_ok=True)\n",
"\n",
"print(f'Repo root : {REPO_ROOT}')\n",
"print(f'CPU.hpp : {CPU_HPP_PATH}')\n",
"print(f'XLSX : {XLSX_PATH}')\n",
"print(f'Output dir: {OUT_DIR}')\n"
]
},
{
@@ -28,9 +62,75 @@
"execution_count": null,
"id": "b33de8ac",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- Sample output for NOP ---\n",
" // [System] 0x000 — NOP: No Operation\n",
" // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
" // Operation: Nothing\n",
" void NOP();\n",
"\n"
]
}
],
"source": [
"# Implement here some kind of \"C++\" printer"
"# Implement here some kind of \"C++\" printer\n",
"\n",
"# ── Indent used throughout the generated block ──────────────────────────────\n",
"INDENT = ' ' # 8 spaces — matches the indentation inside CPU.hpp\n",
"\n",
"def format_instruction(byte_code: str, mnemonic: str, name: str,\n",
" group: str, params: int,\n",
" addr_mask_1: str, addr_mask_2: str,\n",
" type_mask: str, operation: str) -> str:\n",
" \"\"\"\n",
" Returns a single C++ instruction declaration as a string.\n",
"\n",
" Each instruction becomes a commented constant inside the CPU class.\n",
" Format:\n",
" // [GROUP] 0xBYTE — MNEMONIC: Name\n",
" // Params: N | AddrMask1: XX AddrMask2: XX | TypeMask: XX\n",
" // Operation: ...\n",
" MNEMONIC\n",
" \"\"\"\n",
" lines = []\n",
"\n",
" # Header comment: group, opcode, mnemonic and human-readable name.\n",
" lines.append(f'{INDENT}// [{group}] 0x{byte_code} — {mnemonic}: {name}')\n",
"\n",
" # Second comment line: parameter count, addressing masks, type size mask.\n",
" lines.append(f'{INDENT}// Params: {params} | '\n",
" f'AddrMask1: {addr_mask_1} AddrMask2: {addr_mask_2} | '\n",
" f'TypeMask: {type_mask}')\n",
"\n",
" # Third comment line: what this instruction actually does.\n",
" lines.append(f'{INDENT}// Operation: {operation}')\n",
"\n",
" # The declaration itself — just the mnemonic name, matching NOP/SPDR style.\n",
" lines.append(f'{INDENT}void {mnemonic}();') # method declaration inside CPU class # enum value: NAME = 0xOPCODE,\n",
"\n",
" # Empty line between instructions for readability.\n",
" lines.append('')\n",
"\n",
" return '\\n'.join(lines)\n",
"\n",
"\n",
"def format_block(instructions: list) -> str:\n",
" \"\"\"\n",
" Joins all individual instruction strings into one complete block.\n",
" This is the text that will be injected between the pygen-target markers.\n",
" \"\"\"\n",
" # Join every formatted instruction into one big string.\n",
" return '\\n'.join(instructions)\n",
"\n",
"\n",
"# Print what one instruction looks like.\n",
"sample = format_instruction('000','NOP','No Operation','System',0,'00','00','00','Nothing')\n",
"print('--- Sample output for NOP ---')\n",
"print(sample)\n"
]
},
{
@@ -38,35 +138,309 @@
"execution_count": null,
"id": "58645013",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Real instructions : 126\n",
"Reserved slots : 14\n",
"Duplicate check : PASSED\n",
"\n",
"Groups found:\n",
"group\n",
"Integer 19\n",
"System 15\n",
"Bit Wise 14\n",
"Boolean 12\n",
"Branch 12\n",
"Floating Point 10\n",
"Casts 10\n",
"Memory 9\n",
"Trigonometric 7\n",
"Exponential 6\n",
"Matrix 6\n",
"SIMD 5\n",
"Easter Eggs 1\n",
"\n",
"First 5 instructions:\n",
" byte_code mnemonic group params addr_mask_1 type_mask\n",
"0 000 NOP System 0 00 00\n",
"1 001 SPDR System 0 00 00\n",
"2 002 MMODE System 1 05 01\n",
"3 003 INT System 1 1F 0F\n",
"4 004 LRV System 1 1F 0C\n"
]
}
],
"source": [
"# read the instruction sheet with pandas"
"# read the instruction sheet with pandas\n",
"\n",
"\n",
"import pandas as pd\n",
"\n",
"# -- Load --------------------------------------------------------------------\n",
"# The data is on the 'Instructions' sheet. Header is on row index 6 (0-based),\n",
"# so we skip the first 6 rows of decorative merged cells.\n",
"raw = pd.read_excel(XLSX_PATH, sheet_name='Instructions', header=6)\n",
"\n",
"# Rename the two unnamed columns that hold the two addressing mode masks.\n",
"# In the sheet they appear after 'Acc. Addr. Mode Mask' with no header label.\n",
"raw.columns = [\n",
" 'skip_0', # empty column A\n",
" 'skip_1', # 'Base Instr.' label column\n",
" 'byte_code', # opcode hex string e.g. '000'\n",
" 'mnemonic', # short name e.g. 'NOP'\n",
" 'name', # full name e.g. 'No Operation'\n",
" 'group', # category e.g. 'System'\n",
" 'params', # number of parameters (0, 1, or 2)\n",
" 'imp', # addressing mode: Implied\n",
" 'imm', # addressing mode: Immediate\n",
" 'abs', # addressing mode: Absolute\n",
" 'reg', # addressing mode: Register\n",
" 'ind', # addressing mode: Indirect\n",
" 'ptr', # addressing mode: Pointer\n",
" 'idx', # addressing mode: Indexed\n",
" 'sca', # addressing mode: Scaled\n",
" 'dis', # addressing mode: Displaced\n",
" 'addr_mask_1', # accepted addressing mode mask for param 1\n",
" 'addr_mask_2', # accepted addressing mode mask for param 2\n",
" 'B', # type size: Byte (1 byte) supported?\n",
" 'S', # type size: Short (2 bytes) supported?\n",
" 'I', # type size: Int (4 bytes) supported?\n",
" 'L', # type size: Long (8 bytes) supported?\n",
" 'F', # type size: Float supported?\n",
" 'D', # type size: Double supported?\n",
" 'type_mask', # combined type size mask as hex string\n",
" 'operation', # human-readable description of what the instruction does\n",
" 'skip_2', # trailing empty column\n",
"]\n",
"\n",
"# ── Filter ───────────────────────────────────────────────────────────────────\n",
"# Keep only rows that have a byte_code value (drops empty rows at the bottom).\n",
"df = raw[raw['byte_code'].notna()].copy()\n",
"\n",
"# Separate reserved slots from real instructions.\n",
"# Reserved entries have '(reserved)' in the mnemonic column.\n",
"is_reserved = df['mnemonic'].astype(str).str.contains('reserved', case=False, na=False)\n",
"reserved_df = df[is_reserved].copy() # keep for reference\n",
"instrs_df = df[~is_reserved & df['mnemonic'].notna()].copy() # real instructions only\n",
"\n",
"# Skip incomplete entries — rows with no group are placeholder slots (e.g. Int 1-6 Slot)\n",
"# that have no defined behaviour yet. Keeping them would generate invalid C++ identifiers.\n",
"instrs_df = instrs_df[instrs_df['group'].notna()].copy()\n",
"\n",
"# ── Clean ────────────────────────────────────────────────────────────────────\n",
"# Fill NaN masks with '00' (means 'no modes accepted' — safe default).\n",
"instrs_df['addr_mask_1'] = instrs_df['addr_mask_1'].fillna('00').astype(str).str.strip()\n",
"instrs_df['addr_mask_2'] = instrs_df['addr_mask_2'].fillna('00').astype(str).str.strip()\n",
"instrs_df['type_mask'] = instrs_df['type_mask'].fillna('00').astype(str).str.strip()\n",
"instrs_df['params'] = instrs_df['params'].fillna(0).astype(int)\n",
"instrs_df['name'] = instrs_df['name'].fillna('').astype(str).str.strip()\n",
"instrs_df['group'] = instrs_df['group'].fillna('Unknown').astype(str).str.strip()\n",
"instrs_df['operation'] = instrs_df['operation'].fillna('').astype(str).str.strip()\n",
"\n",
"# ── Sanitize mnemonics ──────────────────────────────────────────────────────\n",
"# C++ identifiers cannot contain spaces. Replace spaces with underscores and\n",
"# convert to uppercase so 'Int 1 Slot' becomes 'INT_1_SLOT'.\n",
"instrs_df['mnemonic'] = (\n",
" instrs_df['mnemonic']\n",
" .astype(str)\n",
" .str.strip() # remove leading/trailing whitespace\n",
" .str.replace(' ', '_') # replace internal spaces with underscores\n",
" .str.upper() # uppercase for consistency\n",
")\n",
"\n",
"# ── Validate: duplicate mnemonics ────────────────────────────────────────────\n",
"# Duplicates in real instruction names would cause C++ compilation errors.\n",
"# We abort here rather than generating broken code.\n",
"mnemonic_counts = instrs_df['mnemonic'].value_counts()\n",
"duplicates = mnemonic_counts[mnemonic_counts > 1]\n",
"if not duplicates.empty:\n",
" # Show which mnemonics are duplicated before raising the error.\n",
" raise ValueError(f'Duplicate mnemonics found — fix the sheet before generating:\\n{duplicates}')\n",
"\n",
"print(f'Real instructions : {len(instrs_df)}')\n",
"print(f'Reserved slots : {len(reserved_df)}')\n",
"print(f'Duplicate check : PASSED')\n",
"print(f'\\nGroups found:')\n",
"print(instrs_df['group'].value_counts().to_string())\n",
"print(f'\\nFirst 5 instructions:')\n",
"print(instrs_df[['byte_code','mnemonic','group','params','addr_mask_1','type_mask']].head().to_string())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "452bc76c",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Masks written to: /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/pygen_out/InstructionMasks.hpp\n",
"Lines generated : 268\n"
]
}
],
"source": [
"# well, then export the masks (TODO)"
"# well, then export the masks (TODO)\n",
"\n",
"\n",
"# ── Build the masks header content ──────────────────────────────────────────\n",
"lines = []\n",
"\n",
"# Standard C++ header guard — prevents the file from being included more than once.\n",
"lines.append('#pragma once')\n",
"lines.append('// AUTO-GENERATED by pygen.ipynb — DO NOT EDIT MANUALLY')\n",
"lines.append('#include <spider/runtime/common.hpp>')\n",
"lines.append('')\n",
"lines.append('namespace spider {')\n",
"lines.append('')\n",
"\n",
"# ── Addressing mode mask table ───────────────────────────────────────────────\n",
"# Each instruction has two masks (one per parameter).\n",
"# We write them as a constexpr array so the VM can look them up at runtime\n",
"# using the opcode as the index.\n",
"lines.append('// Addressing mode masks — indexed by opcode.')\n",
"lines.append('// [opcode][0] = mask for param 1, [opcode][1] = mask for param 2')\n",
"lines.append('constexpr u8 ADDR_MODE_MASKS[][2] = {')\n",
"\n",
"for _, row in instrs_df.iterrows():\n",
" # Convert the hex string mask to an integer for the C++ literal.\n",
" m1 = row['addr_mask_1'].replace('.0','').strip() # remove pandas float artefact\n",
" m2 = row['addr_mask_2'].replace('.0','').strip()\n",
" m1 = m1 if m1 != 'nan' else '00'\n",
" m2 = m2 if m2 != 'nan' else '00'\n",
" # Each row: { 0xMASK1, 0xMASK2 }, // MNEMONIC\n",
" lines.append(f' {{ 0x{m1.upper()}, 0x{m2.upper()} }}, // {row[\"mnemonic\"]}')\n",
"\n",
"lines.append('};')\n",
"lines.append('')\n",
"\n",
"# ── Type size mask table ─────────────────────────────────────────────────────\n",
"# A single byte per instruction encoding which type sizes it accepts.\n",
"lines.append('// Type size masks — indexed by opcode.')\n",
"lines.append('constexpr u8 TYPE_SIZE_MASKS[] = {')\n",
"\n",
"for _, row in instrs_df.iterrows():\n",
" tm = str(row['type_mask']).replace('.0','').strip()\n",
" tm = tm if tm != 'nan' else '00'\n",
" lines.append(f' 0x{tm.upper()}, // {row[\"mnemonic\"]}')\n",
"\n",
"lines.append('};')\n",
"lines.append('')\n",
"lines.append('} // namespace spider')\n",
"\n",
"# ── Write to file ────────────────────────────────────────────────────────────\n",
"masks_path = os.path.join(OUT_DIR, 'InstructionMasks.hpp')\n",
"with open(masks_path, 'w', encoding='utf-8') as f:\n",
" # Join with Unix line endings only — repo etiquette says no \\r\\n.\n",
" f.write('\\n'.join(lines))\n",
"\n",
"print(f'Masks written to: {masks_path}')\n",
"print(f'Lines generated : {len(lines)}')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "5aaebef0",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Instructions formatted: 126\n",
"\n",
"--- Preview (first 2 instructions) ---\n",
" // [System] 0x000 — NOP: No Operation\n",
" // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
" // Operation: Nothing\n",
" void NOP();\n",
"\n",
" // [System] 0x001 — SPDR: Will place the Spider version of the interpreter in RA\n",
" // Params: 0 | AddrMask1: 00 AddrMask2: 00 | TypeMask: 00\n",
" // Operation: (Spider Version) -> RA\n",
" void SPDR();\n",
"\n",
"\n",
"CPU.hpp updated successfully at: /home/arturobalam/Documents/7thQuarter/Estancia_2/internship-repo/ArturoBalam-Internship2-repo/spider-runtime-folder/spider-runtime/src/spider/runtime/cpu/CPU.hpp\n",
"Total lines in updated file: 674\n"
]
}
],
"source": [
"# print the CPU Instructions"
"# print the CPU Instructions\n",
"\n",
"# ── Generate all instruction declarations ───────────────────────────────────\n",
"formatted = []\n",
"\n",
"for _, row in instrs_df.iterrows():\n",
" # Clean each field — remove pandas float artefacts like '00.0'\n",
" byte_code = str(row['byte_code']).strip()\n",
" mnemonic = str(row['mnemonic']).strip()\n",
" name = str(row['name']).strip()\n",
" group = str(row['group']).strip()\n",
" params = int(row['params'])\n",
" addr_mask_1 = str(row['addr_mask_1']).replace('.0', '').strip()\n",
" addr_mask_2 = str(row['addr_mask_2']).replace('.0', '').strip()\n",
" type_mask = str(row['type_mask']).replace('.0', '').strip()\n",
" operation = str(row['operation']).strip()\n",
"\n",
" # Call the C++ printer from Cell 2 to format this instruction.\n",
" formatted.append(format_instruction(\n",
" byte_code, mnemonic, name, group,\n",
" params, addr_mask_1, addr_mask_2,\n",
" type_mask, operation\n",
" ))\n",
"\n",
"# Combine all declarations into one block string.\n",
"generated_block = format_block(formatted)\n",
"\n",
"print(f'Instructions formatted: {len(formatted)}')\n",
"print('\\n--- Preview (first 2 instructions) ---')\n",
"print('\\n'.join(formatted[:2]))\n",
"\n",
"# ── Inject into CPU.hpp ──────────────────────────────────────────────────────\n",
"# The markers tell us exactly where to insert the generated block.\n",
"MARKER_OPEN = '// <pygen-target name=cpu-instructions> //'\n",
"MARKER_CLOSE = '// </pygen-target> //'\n",
"\n",
"# Read the current CPU.hpp content.\n",
"with open(CPU_HPP_PATH, 'r', encoding='utf-8') as f:\n",
" original = f.read()\n",
"\n",
"# Verify both markers exist before modifying anything.\n",
"# If either is missing, the file was edited by hand — abort to avoid corruption.\n",
"if MARKER_OPEN not in original:\n",
" raise ValueError(f'Open marker not found in CPU.hpp: {MARKER_OPEN}')\n",
"if MARKER_CLOSE not in original:\n",
" raise ValueError(f'Close marker not found in CPU.hpp: {MARKER_CLOSE}')\n",
"\n",
"# Split the file into 3 parts around the pygen-target markers.\n",
"# before : everything up to and including the open marker\n",
"# after : from the close marker onward (including it)\n",
"before = original[:original.index(MARKER_OPEN) + len(MARKER_OPEN)]\n",
"after = original[original.index(MARKER_CLOSE):]\n",
"\n",
"# Reassemble: keep before, inject the generated block, then restore after.\n",
"updated = before + '\\n' + generated_block + '\\n' + INDENT + after\n",
"\n",
"# Write back using UTF-8 and Unix line endings only (repo etiquette: no \\r\\n).\n",
"with open(CPU_HPP_PATH, 'w', encoding='utf-8', newline='\\n') as f:\n",
" f.write(updated)\n",
"\n",
"print(f'\\nCPU.hpp updated successfully at: {CPU_HPP_PATH}')\n",
"print(f'Total lines in updated file: {len(updated.splitlines())}')\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "spider-rntm-env",
"language": "python",
"name": "python3"
},
@@ -80,7 +454,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
"version": "3.12.3"
}
},
"nbformat": 4,