2 Commits

9 changed files with 255 additions and 47 deletions

View File

@@ -15,7 +15,7 @@ OBJEXT := o
#Flags, Libraries and Includes
ROOT := ./
CFLAGS := -std=c++20 -O2 \
-Wall -Wextra \
-Wall -Werror -Wextra \
-Wshadow -Wnon-virtual-dtor -Wold-style-cast -Wcast-align \
-Wunused -Woverloaded-virtual -Wconversion \
-Wsign-conversion -Wnull-dereference -Wdouble-promotion \

View File

@@ -67,9 +67,10 @@ namespace spider {
void CPU::fetchInstr() {
u16 i = _reel->readU16(RI);
_opcode = (i >> 7) & 0x1FF;
_addrm = (i >> 2) & 0x1F;
_size = i & 0x3;
const u16 oc = (i >> 7);
_opcode = oc & 0x1FF; // GCC WHY!
_addrm = static_cast<u8>((i >> 2) & 0x1F);
_size = static_cast<u8>(i & 0x3);
RI += 2;
}
@@ -90,7 +91,7 @@ namespace spider {
(this->*(CPU::addrModes[_addrm]))();
// modify the _addrm register
_addrm >>= 3;
_addrm = static_cast<u8>((_addrm >> 3) & 0x1F);
_addrm++;
}
@@ -138,7 +139,13 @@ namespace spider {
void CPU::reg() { // NOT FINISHED
// Two consecutive registers can be declared
// Shift if the top part will become .reg too
u8 sh = (_addrm & 0b11000 == 0b11000) * 4;
u8 sh = ((_addrm & 0b11000) == 0b11000) * 4;
u8 use = 1 - (sh >> 2); // (sh / 4)
// get byte
u8 reg = (_reel->readU8(RI) >> sh) & 0xF;
_alu = &GPR[reg];
RI += use;
// store no-op
_post = &CPU::imp;

View File

@@ -39,7 +39,7 @@ namespace spider {
;
}
void drawCPUTempl(Terminal& t, CPU& cpu) {
void drawCPUTempl(Terminal& t) {
i32 r = 8, c = 1;
i32 w = 35, h = 31;
t.drawBox(r, c, w, h, "CPU");
@@ -171,10 +171,10 @@ namespace spider {
t.flush();
}
i32 addressWidth(isize ramSize) {
u32 addressWidth(isize ramSize) {
if (ramSize == 0) return 1;
isize maxAddr = ramSize - 1;
i32 digits = 0;
u32 digits = 0;
// Shift by increments of 4 (one hex nibble)
// We use a do-while to ensure at least 1 digit is returned for small RAMs
do {
@@ -193,13 +193,13 @@ namespace spider {
* @param progress The current progress
* @param total The total
*/
void drawScrollThumb(Terminal& term, i32 x, i32 y, i32 trackHeight, isize progress, isize total) {
void drawScrollThumb(Terminal& term, u32 x, u32 y, u32 trackHeight, isize progress, isize total) {
if (total == 0 || trackHeight <= 0) return;
// 1. Draw the background track (Light Shade: ░)
term.style(Terminal::FG_B_BLACK); // Dim the track
for (int i = 0; i < trackHeight; ++i) {
term.move(y + i, x).print("");
for (u32 i = 0; i < trackHeight; ++i) {
term.move(i32(y + i), i32(x)).print("");
}
// 2. Calculate Thumb Position
@@ -210,10 +210,10 @@ namespace spider {
f64 ratio = f64(progress) / f64(total);
// Map to track coordinates
i32 thumbOffset = i32(ratio * (trackHeight - 1));
u32 thumbOffset = u32(ratio * (trackHeight - 1));
// 3. Draw the Thumb (Full Block: █)
term.move(y + thumbOffset, x);
term.move(i32(y + thumbOffset), i32(x));
term.style(Terminal::FG_WHITE).print("");
term.style(Terminal::RESET);
}
@@ -230,30 +230,30 @@ namespace spider {
*/
void drawRAM(Terminal& term, RAM& ram, u64 scrollPos) {
// 1. Draw the container box
i32 y = 3;
i32 height = 36;
u32 y = 3;
u32 height = 36;
// 2. Configuration for the hex layout
int addrWidth = addressWidth(ram.size());
int bytesPerRow = 8;
int displayRows = height - 2; // Subtract top/bottom borders
i32 width = (2 + 2 + 16 + 7 + 3 + 8 + 4) + addrWidth;
i32 x = 37;
u32 addrWidth = addressWidth(ram.size());
u32 bytesPerRow = 8;
u32 displayRows = height - 2; // Subtract top/bottom borders
u32 width = (2 + 2 + 16 + 7 + 3 + 8 + 4) + addrWidth;
u32 x = 37;
// create box
term.drawBox(y, x, width, height, "RAM");
term.drawBox(i32(y), i32(x), i32(width), i32(height), "RAM");
drawScrollThumb(term, x + width - 2, y + 1, height - 2, scrollPos, ram.size());
// Ensure scrollPos is within bounds and aligned
if (scrollPos < 0) scrollPos = 0;
//if (scrollPos < 0) scrollPos = 0;
if (scrollPos > ram.size()) scrollPos = ram.size();
for (int i = 0; i < displayRows; ++i) {
for (u32 i = 0; i < displayRows; ++i) {
isize currentRowAddr = scrollPos + (i * bytesPerRow);
// address lock
if (currentRowAddr >= ram.size()) {
term.move(y + 1 + i, x + 1);
term.move(i32(y + 1 + i), i32(x + 1));
term.print(std::string(width - 3, ' '));
continue;
}
@@ -266,11 +266,11 @@ namespace spider {
ss << std::setfill('0') << std::uppercase << std::hex;
// address
ssaddr << std::setw(addrWidth) << currentRowAddr << " ";
ssaddr << std::setw(i32(addrWidth)) << currentRowAddr << " ";
// Hex Bytes
std::string asciiPart = "";
for (int j = 0; j < bytesPerRow; ++j) {
for (u32 j = 0; j < bytesPerRow; ++j) {
isize targetAddr = currentRowAddr + j;
if (targetAddr >= ram.size()) {
ss << ""; // Padding for end of memory
@@ -279,12 +279,12 @@ namespace spider {
}
u8 byte = ram[targetAddr];
ss << std::setfill('0') << std::setw(2) << std::hex << (u32)byte << " ";
asciiPart += (std::isprint(byte) ? (char)byte : '.');
ss << std::setfill('0') << std::setw(2) << std::hex << u32(byte) << " ";
asciiPart += (std::isprint(byte) ? char(byte) : '.');
}
// --- Combine and Print ---
term.move(y + 1 + i, x + 2); // Move inside the box
term.move(i32(y + 1 + i), i32(x + 2)); // Move inside the box
term.style(Terminal::FG_B_CYAN).print(ssaddr.str()); // Hex part in Cyan
term.style(Terminal::FG_WHITE).print(ss.str());
term.style(Terminal::FG_B_YELLOW).print(" | ");
@@ -351,7 +351,7 @@ namespace spider {
drawTime(t);
drawHead(t);
drawCPUTempl(t, runtime.cpu);
drawCPUTempl(t);
// delay for time
auto last_exec = std::chrono::steady_clock::now();

View File

@@ -0,0 +1,103 @@
#include "Matrix.hpp"
#include <immintrin.h>
#include <type_traits>
#include <algorithm>
#include <cstring>
namespace spider {
template<typename T>
void matrix_fill(T diag, Matrix<T> mat) {
for (isize i = 0; i < mat.rows; i++) {
for (isize j = 0; j < mat.cols; j++) {
m.data[i + j * mat.rows] = i == j ? diag : T(0);
}
}
}
template<typename T>
void matrix_mult(Matrix<T> m1, Matrix<T> m2, Matrix<T> mr) {
// natural constrains of matrix multiplication
if (m1.rows != mr.rows) return;
if (m2.cols != mr.cols) return;
if (m1.cols != m2.rows) return;
// fill result with zeroes
std::fill(mr.data, mr.data + mr.rows * mr.cols, T(0));
// Begin Loop
for (isize j = 0; j < mr.cols; j++) { // P
for (isize n = 0; n < m1.cols; n++) { // N
const T val_m2 = m2.data[n + j * m2.rows] * diag;
isize i = 0;
#if defined(__AVX__)
if constexpr (std::is_same_v<T, float>) {
const __m256 v_m2 = _mm256_set1_ps(val_m2);
for (; i <= mr.rows - 8; i += 8) {
__m256 v_m1 = _mm256_loadu_ps(&m1.data[i + n * m1.rows]);
__m256 v_mr = _mm256_loadu_ps(&mr.data[i + j * mr.rows]);
v_mr = _mm256_fmadd_ps(v_m1, v_m2, v_mr);
_mm256_storeu_ps(&mr.data[i + j * mr.rows], v_mr);
}
if (i < mr.rows) {
float buf_m1[8] = { 0 }, buf_mr[8] = { 0 };
isize rem = mr.rows - i;
std::memcpy(buf_m1, &m1.data[i + n * m1.rows], rem * sizeof(T));
std::memcpy(buf_mr, &mr.data[i + j * mr.rows], rem * sizeof(T));
_mm256_storeu_ps(buf_mr, _mm256_fmadd_ps(_mm256_loadu_ps(buf_m1), v_m2, _mm256_loadu_ps(buf_mr)));
std::memcpy(&mr.data[i + j * mr.rows], buf_mr, rem * sizeof(T));
}
}
else if constexpr (std::is_same_v<T, double>) {
const __m256d v_m2 = _mm256_set1_pd(val_m2);
for (; i <= mr.rows - 4; i += 4) {
__m256d v_m1 = _mm256_loadu_pd(&m1.data[i + n * m1.rows]);
__m256d v_mr = _mm256_loadu_pd(&mr.data[i + j * mr.rows]);
v_mr = _mm256_fmadd_pd(v_m1, v_m2, v_mr);
_mm256_storeu_pd(&mr.data[i + j * mr.rows], v_mr);
}
if (i < mr.rows) {
double buf_m1[4] = { 0 }, buf_mr[4] = { 0 };
isize rem = mr.rows - i;
std::memcpy(buf_m1, &m1.data[i + n * m1.rows], rem * sizeof(T));
std::memcpy(buf_mr, &mr.data[i + j * mr.rows], rem * sizeof(T));
_mm256_storeu_pd(buf_mr, _mm256_fmadd_pd(_mm256_loadu_pd(buf_m1), v_m2, _mm256_loadu_pd(buf_mr)));
std::memcpy(&mr.data[i + j * mr.rows], buf_mr, rem * sizeof(T));
}
}
else
#elif defined(__SSE2__)
if constexpr (std::is_same_v<T, float>) {
const __m128 v_m2 = _mm_set1_ps(val_m2);
for (; i <= mr.rows - 4; i += 4) {
__m128 v_m1 = _mm_loadu_ps(&m1.data[i + n * m1.rows]);
__m128 v_mr = _mm_loadu_ps(&mr.data[i + j * mr.rows]);
v_mr = _mm_add_ps(v_mr, _mm_mul_ps(v_m1, v_m2));
_mm_storeu_ps(&mr.data[i + j * mr.rows], v_mr);
}
// Tail buffer logic omitted for brevity, same as float AVX but with size 4
}
else if constexpr (std::is_same_v<T, double>) {
const __m128d v_m2 = _mm_set1_pd(val_m2);
for (; i <= mr.rows - 2; i += 2) {
__m128d v_m1 = _mm_loadu_pd(&m1.data[i + n * m1.rows]);
__m128d v_mr = _mm_loadu_pd(&mr.data[i + j * mr.rows]);
v_mr = _mm_add_pd(v_mr, _mm_mul_pd(v_m1, v_m2));
_mm_storeu_pd(&mr.data[i + j * mr.rows], v_mr);
}
}
else
#endif
{
// Fallback for non-SIMD or unsupported types
for (; i < mr.rows; i++) {
mr.data[i + j * mr.rows] += m1.data[i + n * m1.rows] * val_m2;
}
}
}
}
}
}

View File

@@ -0,0 +1,29 @@
#pragma once
#include <spider/runtime/common.hpp>
namespace spider {
template<typename T>
struct Matrix {
T* data;
isize rows, cols;
};
void matrix_fill(f32 diag, Matrix<f32> mat);
void matrix_fill(f64 diag, Matrix<f64> mat);
Matrix<f32> matrix_mul(Matrix<f32> m1, Matrix<f32> m2);
Matrix<f64> matrix_mul(Matrix<f64> m1, Matrix<f64> m2);
Matrix<f32> matrix_inv(Matrix<f32> mat);
Matrix<f64> matrix_inv(Matrix<f64> mat);
f32 matrix_det(Matrix<f32> mat);
f64 matrix_det(Matrix<f64> mat);
}

View File

@@ -3,20 +3,57 @@
#include <iostream>
namespace spider {
/**
* Multiplies two quaternions together.
* General case, use it when no optimizations exist.
*/
template<typename T>
inline Quat<T> quat_mul_gnrl(Quat<T> A, Quat<T> B) {
return {
B.w * A.w - B.x * A.x - B.y * A.y - B.z * A.z,
B.w * A.x + B.x * A.w - B.y * A.z + B.z * A.y,
B.w * A.y + B.x * A.z + B.y * A.w - B.z * A.x,
B.w * A.z - B.x * A.y + B.y * A.x + B.z * A.w
};
}
/**
* Multiplies two quaternions together.
* Attempts to use SIMD instructions when available.
*/
template<typename T>
inline Quat<T> quat_mul_smart(Quat<T> A, Quat<T> B) {
}
Quat<f32> quat_mul(Quat<f32> q1, Quat<f32> q2) {
return quat_mul_gnrl<f32>(q1, q2);
}
void quat_mat(Quat<f32> quat, f32* mat) {
// TODO
}
Quat<f64> quat_mul(Quat<f64> q1, Quat<f64> q2) {
return quat_mul_gnrl<f64>(q1, q2);
}
void quat_mat(Quat<f64> q1, f64* mat) {
// TODO
}
/*
int quatMain() {
Quat<double> q1 = { 1.0f, 0.0f, 0.0f, 0.0f };
Quat<double> q2 = { 0.5f, 0.5f, 0.5f, 0.5f };
Quat<double> result = quat_multiply(q1, q2); // Returns the result!
std::cout << "Result: ("
<< result.w << ", "
<< result.x << ", "
<< result.y << ", "
<< result.z << ")" << std::endl;
return 0;
}
*/
}

View File

@@ -10,15 +10,47 @@ namespace spider {
};
/**
* Multiplies two quaternions together.
* Creates a quaternion from Euler Angles.
*/
template<typename T> inline Quat<T> quat_multiply(Quat<T> A, Quat<T> B) {
return {
B.w * A.w - B.x * A.x - B.y * A.y - B.z * A.z,
B.w * A.x + B.x * A.w - B.y * A.z + B.z * A.y,
B.w * A.y + B.x * A.z + B.y * A.w - B.z * A.x,
B.w * A.z - B.x * A.y + B.y * A.x + B.z * A.w
};
}
Quat<f32> quat_make_euler(f32 x, f32 y, f32 z);
/**
* Creates a quaternion from an axis and an angle.
*/
Quat<f32> quat_make_axis_angle(f32 angle, f32 x, f32 y, f32 z);
/**
* Creates a quaternion from Euler Angles.
*/
Quat<f64> quat_make_euler(f64 x, f64 y, f64 z);
/**
* Creates a quaternion from an axis and an angle.
*/
Quat<f64> quat_make_axis_angle(f64 angle, f64 x, f64 y, f64 z);
/**
* Multiples a quaternion with another quaternion.
* The result is output to the qr variable.
* This pointer can be the same as q1.
*/
Quat<f32> quat_mul(Quat<f32> q1, Quat<f32> q2);
/**
* Converts a quaternion to a matrix.
*/
void quat_mat(Quat<f32> quat, f32* mat);
/**
* Multiples a quaternion with another quaternion.
* The result is output to the qr variable.
* This pointer can be the same as q1.
*/
Quat<f64> quat_mul(Quat<f64> q1, Quat<f64> q2);
/**
* Converts a quaternion to a matrix.
*/
void quat_mat(Quat<f64> q1, f64* mat);
}

View File

@@ -277,14 +277,14 @@ namespace spider {
template<>
inline void loadPartialLE<f32>(f32* n, const u8* bytes, isize length) {
u32 tmp;
loadLE(&tmp, bytes);
loadPartialLE(&tmp, bytes, length);
*n = bit_cast<f32>(tmp);
}
template<>
inline void loadPartialLE<f64>(f64* n, const u8* bytes, isize length) {
u64 tmp;
loadLE(&tmp, bytes);
loadPartialLE(&tmp, bytes, length);
*n = bit_cast<f64>(tmp);
}

View File

@@ -156,8 +156,8 @@ namespace spider {
if (s.length() >= isize(width)) {
std::cout << s;
} else {
i32 total_padding = width - s.length();
i32 left_padding = total_padding / 2;
isize total_padding = isize(width) - s.length();
isize left_padding = total_padding / 2;
std::cout << std::string(left_padding, ' ');
std::cout << s;
std::cout << std::string(total_padding - left_padding, ' ');