diff --git a/makefile b/makefile index 92cedfa..590408f 100644 --- a/makefile +++ b/makefile @@ -15,7 +15,7 @@ OBJEXT := o #Flags, Libraries and Includes ROOT := ./ CFLAGS := -std=c++20 -O2 \ - -Wall -Wextra \ + -Wall -Werror -Wextra \ -Wshadow -Wnon-virtual-dtor -Wold-style-cast -Wcast-align \ -Wunused -Woverloaded-virtual -Wconversion \ -Wsign-conversion -Wnull-dereference -Wdouble-promotion \ diff --git a/src/spider/runtime/cpu/CPU.cpp b/src/spider/runtime/cpu/CPU.cpp index 4b984d8..1a796b1 100644 --- a/src/spider/runtime/cpu/CPU.cpp +++ b/src/spider/runtime/cpu/CPU.cpp @@ -67,9 +67,10 @@ namespace spider { void CPU::fetchInstr() { u16 i = _reel->readU16(RI); - _opcode = (i >> 7) & 0x1FF; - _addrm = (i >> 2) & 0x1F; - _size = i & 0x3; + const u16 oc = (i >> 7); + _opcode = oc & 0x1FF; // GCC WHY! + _addrm = static_cast((i >> 2) & 0x1F); + _size = static_cast(i & 0x3); RI += 2; } @@ -90,7 +91,7 @@ namespace spider { (this->*(CPU::addrModes[_addrm]))(); // modify the _addrm register - _addrm >>= 3; + _addrm = static_cast((_addrm >> 3) & 0x1F); _addrm++; } @@ -138,7 +139,13 @@ namespace spider { void CPU::reg() { // NOT FINISHED // Two consecutive registers can be declared // Shift if the top part will become .reg too - u8 sh = (_addrm & 0b11000 == 0b11000) * 4; + u8 sh = ((_addrm & 0b11000) == 0b11000) * 4; + u8 use = 1 - (sh >> 2); // (sh / 4) + + // get byte + u8 reg = (_reel->readU8(RI) >> sh) & 0xF; + _alu = &GPR[reg]; + RI += use; // store no-op _post = &CPU::imp; diff --git a/src/spider/runtime/debug/LiveDebug.cpp b/src/spider/runtime/debug/LiveDebug.cpp index 8e80d5c..ddb6192 100644 --- a/src/spider/runtime/debug/LiveDebug.cpp +++ b/src/spider/runtime/debug/LiveDebug.cpp @@ -39,7 +39,7 @@ namespace spider { ; } - void drawCPUTempl(Terminal& t, CPU& cpu) { + void drawCPUTempl(Terminal& t) { i32 r = 8, c = 1; i32 w = 35, h = 31; t.drawBox(r, c, w, h, "CPU"); @@ -171,10 +171,10 @@ namespace spider { t.flush(); } - i32 addressWidth(isize ramSize) { + u32 addressWidth(isize ramSize) { if (ramSize == 0) return 1; isize maxAddr = ramSize - 1; - i32 digits = 0; + u32 digits = 0; // Shift by increments of 4 (one hex nibble) // We use a do-while to ensure at least 1 digit is returned for small RAMs do { @@ -193,13 +193,13 @@ namespace spider { * @param progress The current progress * @param total The total */ - void drawScrollThumb(Terminal& term, i32 x, i32 y, i32 trackHeight, isize progress, isize total) { + void drawScrollThumb(Terminal& term, u32 x, u32 y, u32 trackHeight, isize progress, isize total) { if (total == 0 || trackHeight <= 0) return; // 1. Draw the background track (Light Shade: ░) term.style(Terminal::FG_B_BLACK); // Dim the track - for (int i = 0; i < trackHeight; ++i) { - term.move(y + i, x).print("░"); + for (u32 i = 0; i < trackHeight; ++i) { + term.move(i32(y + i), i32(x)).print("░"); } // 2. Calculate Thumb Position @@ -210,10 +210,10 @@ namespace spider { f64 ratio = f64(progress) / f64(total); // Map to track coordinates - i32 thumbOffset = i32(ratio * (trackHeight - 1)); + u32 thumbOffset = u32(ratio * (trackHeight - 1)); // 3. Draw the Thumb (Full Block: █) - term.move(y + thumbOffset, x); + term.move(i32(y + thumbOffset), i32(x)); term.style(Terminal::FG_WHITE).print("█"); term.style(Terminal::RESET); } @@ -230,30 +230,30 @@ namespace spider { */ void drawRAM(Terminal& term, RAM& ram, u64 scrollPos) { // 1. Draw the container box - i32 y = 3; - i32 height = 36; + u32 y = 3; + u32 height = 36; // 2. Configuration for the hex layout - int addrWidth = addressWidth(ram.size()); - int bytesPerRow = 8; - int displayRows = height - 2; // Subtract top/bottom borders - i32 width = (2 + 2 + 16 + 7 + 3 + 8 + 4) + addrWidth; - i32 x = 37; + u32 addrWidth = addressWidth(ram.size()); + u32 bytesPerRow = 8; + u32 displayRows = height - 2; // Subtract top/bottom borders + u32 width = (2 + 2 + 16 + 7 + 3 + 8 + 4) + addrWidth; + u32 x = 37; // create box - term.drawBox(y, x, width, height, "RAM"); + term.drawBox(i32(y), i32(x), i32(width), i32(height), "RAM"); drawScrollThumb(term, x + width - 2, y + 1, height - 2, scrollPos, ram.size()); // Ensure scrollPos is within bounds and aligned - if (scrollPos < 0) scrollPos = 0; + //if (scrollPos < 0) scrollPos = 0; if (scrollPos > ram.size()) scrollPos = ram.size(); - for (int i = 0; i < displayRows; ++i) { + for (u32 i = 0; i < displayRows; ++i) { isize currentRowAddr = scrollPos + (i * bytesPerRow); // address lock if (currentRowAddr >= ram.size()) { - term.move(y + 1 + i, x + 1); + term.move(i32(y + 1 + i), i32(x + 1)); term.print(std::string(width - 3, ' ')); continue; } @@ -266,11 +266,11 @@ namespace spider { ss << std::setfill('0') << std::uppercase << std::hex; // address - ssaddr << std::setw(addrWidth) << currentRowAddr << " "; + ssaddr << std::setw(i32(addrWidth)) << currentRowAddr << " "; // Hex Bytes std::string asciiPart = ""; - for (int j = 0; j < bytesPerRow; ++j) { + for (u32 j = 0; j < bytesPerRow; ++j) { isize targetAddr = currentRowAddr + j; if (targetAddr >= ram.size()) { ss << ""; // Padding for end of memory @@ -279,12 +279,12 @@ namespace spider { } u8 byte = ram[targetAddr]; - ss << std::setfill('0') << std::setw(2) << std::hex << (u32)byte << " "; - asciiPart += (std::isprint(byte) ? (char)byte : '.'); + ss << std::setfill('0') << std::setw(2) << std::hex << u32(byte) << " "; + asciiPart += (std::isprint(byte) ? char(byte) : '.'); } // --- Combine and Print --- - term.move(y + 1 + i, x + 2); // Move inside the box + term.move(i32(y + 1 + i), i32(x + 2)); // Move inside the box term.style(Terminal::FG_B_CYAN).print(ssaddr.str()); // Hex part in Cyan term.style(Terminal::FG_WHITE).print(ss.str()); term.style(Terminal::FG_B_YELLOW).print(" | "); @@ -351,7 +351,7 @@ namespace spider { drawTime(t); drawHead(t); - drawCPUTempl(t, runtime.cpu); + drawCPUTempl(t); // delay for time auto last_exec = std::chrono::steady_clock::now(); diff --git a/src/spider/runtime/math/Matrix.cpp b/src/spider/runtime/math/Matrix.cpp new file mode 100644 index 0000000..599001e --- /dev/null +++ b/src/spider/runtime/math/Matrix.cpp @@ -0,0 +1,103 @@ +#include "Matrix.hpp" + +#include +#include +#include +#include + +namespace spider { + + template + void matrix_fill(T diag, Matrix mat) { + for (isize i = 0; i < mat.rows; i++) { + for (isize j = 0; j < mat.cols; j++) { + m.data[i + j * mat.rows] = i == j ? diag : T(0); + } + } + } + + template + void matrix_mult(Matrix m1, Matrix m2, Matrix mr) { + // natural constrains of matrix multiplication + if (m1.rows != mr.rows) return; + if (m2.cols != mr.cols) return; + if (m1.cols != m2.rows) return; + + // fill result with zeroes + std::fill(mr.data, mr.data + mr.rows * mr.cols, T(0)); + + // Begin Loop + for (isize j = 0; j < mr.cols; j++) { // P + for (isize n = 0; n < m1.cols; n++) { // N + const T val_m2 = m2.data[n + j * m2.rows] * diag; + isize i = 0; + +#if defined(__AVX__) + if constexpr (std::is_same_v) { + const __m256 v_m2 = _mm256_set1_ps(val_m2); + for (; i <= mr.rows - 8; i += 8) { + __m256 v_m1 = _mm256_loadu_ps(&m1.data[i + n * m1.rows]); + __m256 v_mr = _mm256_loadu_ps(&mr.data[i + j * mr.rows]); + v_mr = _mm256_fmadd_ps(v_m1, v_m2, v_mr); + _mm256_storeu_ps(&mr.data[i + j * mr.rows], v_mr); + } + if (i < mr.rows) { + float buf_m1[8] = { 0 }, buf_mr[8] = { 0 }; + isize rem = mr.rows - i; + std::memcpy(buf_m1, &m1.data[i + n * m1.rows], rem * sizeof(T)); + std::memcpy(buf_mr, &mr.data[i + j * mr.rows], rem * sizeof(T)); + _mm256_storeu_ps(buf_mr, _mm256_fmadd_ps(_mm256_loadu_ps(buf_m1), v_m2, _mm256_loadu_ps(buf_mr))); + std::memcpy(&mr.data[i + j * mr.rows], buf_mr, rem * sizeof(T)); + } + } + else if constexpr (std::is_same_v) { + const __m256d v_m2 = _mm256_set1_pd(val_m2); + for (; i <= mr.rows - 4; i += 4) { + __m256d v_m1 = _mm256_loadu_pd(&m1.data[i + n * m1.rows]); + __m256d v_mr = _mm256_loadu_pd(&mr.data[i + j * mr.rows]); + v_mr = _mm256_fmadd_pd(v_m1, v_m2, v_mr); + _mm256_storeu_pd(&mr.data[i + j * mr.rows], v_mr); + } + if (i < mr.rows) { + double buf_m1[4] = { 0 }, buf_mr[4] = { 0 }; + isize rem = mr.rows - i; + std::memcpy(buf_m1, &m1.data[i + n * m1.rows], rem * sizeof(T)); + std::memcpy(buf_mr, &mr.data[i + j * mr.rows], rem * sizeof(T)); + _mm256_storeu_pd(buf_mr, _mm256_fmadd_pd(_mm256_loadu_pd(buf_m1), v_m2, _mm256_loadu_pd(buf_mr))); + std::memcpy(&mr.data[i + j * mr.rows], buf_mr, rem * sizeof(T)); + } + } + else +#elif defined(__SSE2__) + if constexpr (std::is_same_v) { + const __m128 v_m2 = _mm_set1_ps(val_m2); + for (; i <= mr.rows - 4; i += 4) { + __m128 v_m1 = _mm_loadu_ps(&m1.data[i + n * m1.rows]); + __m128 v_mr = _mm_loadu_ps(&mr.data[i + j * mr.rows]); + v_mr = _mm_add_ps(v_mr, _mm_mul_ps(v_m1, v_m2)); + _mm_storeu_ps(&mr.data[i + j * mr.rows], v_mr); + } + // Tail buffer logic omitted for brevity, same as float AVX but with size 4 + } + else if constexpr (std::is_same_v) { + const __m128d v_m2 = _mm_set1_pd(val_m2); + for (; i <= mr.rows - 2; i += 2) { + __m128d v_m1 = _mm_loadu_pd(&m1.data[i + n * m1.rows]); + __m128d v_mr = _mm_loadu_pd(&mr.data[i + j * mr.rows]); + v_mr = _mm_add_pd(v_mr, _mm_mul_pd(v_m1, v_m2)); + _mm_storeu_pd(&mr.data[i + j * mr.rows], v_mr); + } + } + else +#endif + { + // Fallback for non-SIMD or unsupported types + for (; i < mr.rows; i++) { + mr.data[i + j * mr.rows] += m1.data[i + n * m1.rows] * val_m2; + } + } + } + } + } + +} diff --git a/src/spider/runtime/math/Matrix.hpp b/src/spider/runtime/math/Matrix.hpp new file mode 100644 index 0000000..4b62046 --- /dev/null +++ b/src/spider/runtime/math/Matrix.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace spider { + + template + struct Matrix { + T* data; + isize rows, cols; + }; + + void matrix_fill(f32 diag, Matrix mat); + + void matrix_fill(f64 diag, Matrix mat); + + Matrix matrix_mul(Matrix m1, Matrix m2); + + Matrix matrix_mul(Matrix m1, Matrix m2); + + Matrix matrix_inv(Matrix mat); + + Matrix matrix_inv(Matrix mat); + + f32 matrix_det(Matrix mat); + + f64 matrix_det(Matrix mat); + +} diff --git a/src/spider/runtime/math/Quat.cpp b/src/spider/runtime/math/Quat.cpp index ae40493..433bd00 100644 --- a/src/spider/runtime/math/Quat.cpp +++ b/src/spider/runtime/math/Quat.cpp @@ -3,20 +3,57 @@ #include namespace spider { + + /** + * Multiplies two quaternions together. + * General case, use it when no optimizations exist. + */ + template + inline Quat quat_mul_gnrl(Quat A, Quat B) { + return { + B.w * A.w - B.x * A.x - B.y * A.y - B.z * A.z, + B.w * A.x + B.x * A.w - B.y * A.z + B.z * A.y, + B.w * A.y + B.x * A.z + B.y * A.w - B.z * A.x, + B.w * A.z - B.x * A.y + B.y * A.x + B.z * A.w + }; + } + /** + * Multiplies two quaternions together. + * Attempts to use SIMD instructions when available. + */ + template + inline Quat quat_mul_smart(Quat A, Quat B) { + } + + Quat quat_mul(Quat q1, Quat q2) { + return quat_mul_gnrl(q1, q2); + } + + void quat_mat(Quat quat, f32* mat) { + // TODO + } + + Quat quat_mul(Quat q1, Quat q2) { + return quat_mul_gnrl(q1, q2); + } + + void quat_mat(Quat q1, f64* mat) { + // TODO + } + + /* int quatMain() { Quat q1 = { 1.0f, 0.0f, 0.0f, 0.0f }; Quat q2 = { 0.5f, 0.5f, 0.5f, 0.5f }; - Quat result = quat_multiply(q1, q2); // Returns the result! - std::cout << "Result: (" << result.w << ", " << result.x << ", " << result.y << ", " << result.z << ")" << std::endl; - return 0; } + */ } diff --git a/src/spider/runtime/math/Quat.hpp b/src/spider/runtime/math/Quat.hpp index 676778c..8f774db 100644 --- a/src/spider/runtime/math/Quat.hpp +++ b/src/spider/runtime/math/Quat.hpp @@ -10,15 +10,47 @@ namespace spider { }; /** - * Multiplies two quaternions together. + * Creates a quaternion from Euler Angles. */ - template inline Quat quat_multiply(Quat A, Quat B) { - return { - B.w * A.w - B.x * A.x - B.y * A.y - B.z * A.z, - B.w * A.x + B.x * A.w - B.y * A.z + B.z * A.y, - B.w * A.y + B.x * A.z + B.y * A.w - B.z * A.x, - B.w * A.z - B.x * A.y + B.y * A.x + B.z * A.w - }; - } + Quat quat_make_euler(f32 x, f32 y, f32 z); + + /** + * Creates a quaternion from an axis and an angle. + */ + Quat quat_make_axis_angle(f32 angle, f32 x, f32 y, f32 z); + + /** + * Creates a quaternion from Euler Angles. + */ + Quat quat_make_euler(f64 x, f64 y, f64 z); + + /** + * Creates a quaternion from an axis and an angle. + */ + Quat quat_make_axis_angle(f64 angle, f64 x, f64 y, f64 z); + + /** + * Multiples a quaternion with another quaternion. + * The result is output to the qr variable. + * This pointer can be the same as q1. + */ + Quat quat_mul(Quat q1, Quat q2); + + /** + * Converts a quaternion to a matrix. + */ + void quat_mat(Quat quat, f32* mat); + + /** + * Multiples a quaternion with another quaternion. + * The result is output to the qr variable. + * This pointer can be the same as q1. + */ + Quat quat_mul(Quat q1, Quat q2); + + /** + * Converts a quaternion to a matrix. + */ + void quat_mat(Quat q1, f64* mat); } diff --git a/src/spider/runtime/memory/Types.hpp b/src/spider/runtime/memory/Types.hpp index 7d89ae1..bee219f 100644 --- a/src/spider/runtime/memory/Types.hpp +++ b/src/spider/runtime/memory/Types.hpp @@ -277,14 +277,14 @@ namespace spider { template<> inline void loadPartialLE(f32* n, const u8* bytes, isize length) { u32 tmp; - loadLE(&tmp, bytes); + loadPartialLE(&tmp, bytes, length); *n = bit_cast(tmp); } template<> inline void loadPartialLE(f64* n, const u8* bytes, isize length) { u64 tmp; - loadLE(&tmp, bytes); + loadPartialLE(&tmp, bytes, length); *n = bit_cast(tmp); } diff --git a/src/spider/runtime/util/Terminal.hpp b/src/spider/runtime/util/Terminal.hpp index 3f183f1..65fd323 100644 --- a/src/spider/runtime/util/Terminal.hpp +++ b/src/spider/runtime/util/Terminal.hpp @@ -156,8 +156,8 @@ namespace spider { if (s.length() >= isize(width)) { std::cout << s; } else { - i32 total_padding = width - s.length(); - i32 left_padding = total_padding / 2; + isize total_padding = isize(width) - s.length(); + isize left_padding = total_padding / 2; std::cout << std::string(left_padding, ' '); std::cout << s; std::cout << std::string(total_padding - left_padding, ' ');