Merge branch 'main' of https://git.sintekanalytics.com/SpiderLang/spider-runtime

I'm still working on this
2026-03-29 07:50:14 -06:00 · 2026-03-29 07:50:11 -06:00
9 changed files with 255 additions and 47 deletions
@@ -15,7 +15,7 @@ OBJEXT      := o
 #Flags, Libraries and Includes
 ROOT        := ./
 CFLAGS      := -std=c++20 -O2 \
-    		   -Wall -Wextra \
+    		   -Wall -Werror -Wextra \
    		   -Wshadow -Wnon-virtual-dtor -Wold-style-cast -Wcast-align \
    		   -Wunused -Woverloaded-virtual -Wconversion \
    		   -Wsign-conversion -Wnull-dereference -Wdouble-promotion \
@@ -67,9 +67,10 @@ namespace spider {

    void CPU::fetchInstr() {
        u16 i = _reel->readU16(RI);
-        _opcode = (i >> 7) & 0x1FF;
-        _addrm = (i >> 2) & 0x1F;
-        _size = i & 0x3;
+        const u16 oc = (i >> 7);
+        _opcode = oc & 0x1FF; // GCC WHY!
+        _addrm = static_cast<u8>((i >> 2) & 0x1F);
+        _size = static_cast<u8>(i & 0x3);
        RI += 2;
    }

@@ -90,7 +91,7 @@ namespace spider {
        (this->*(CPU::addrModes[_addrm]))();

        // modify the _addrm register
-        _addrm >>= 3;
+        _addrm = static_cast<u8>((_addrm >> 3) & 0x1F);
        _addrm++;
    }

@@ -138,7 +139,13 @@ namespace spider {
    void CPU::reg() { // NOT FINISHED
        // Two consecutive registers can be declared
        // Shift if the top part will become .reg too
-        u8 sh = (_addrm & 0b11000 == 0b11000) * 4;
+        u8 sh = ((_addrm & 0b11000) == 0b11000) * 4;
+        u8 use = 1 - (sh >> 2); // (sh / 4)
+
+        // get byte
+        u8 reg = (_reel->readU8(RI) >> sh) & 0xF;
+        _alu = &GPR[reg];
+        RI += use;

        // store no-op
        _post = &CPU::imp;
@@ -39,7 +39,7 @@ namespace spider {
            ;
    }

-    void drawCPUTempl(Terminal& t, CPU& cpu) {
+    void drawCPUTempl(Terminal& t) {
        i32 r = 8, c = 1;
        i32 w = 35, h = 31;
        t.drawBox(r, c, w, h, "CPU");
@@ -171,10 +171,10 @@ namespace spider {
        t.flush();
    }

-    i32 addressWidth(isize ramSize) {
+    u32 addressWidth(isize ramSize) {
        if (ramSize == 0) return 1;
        isize maxAddr = ramSize - 1;
-        i32 digits = 0;
+        u32 digits = 0;
        // Shift by increments of 4 (one hex nibble)
        // We use a do-while to ensure at least 1 digit is returned for small RAMs
        do {
@@ -193,13 +193,13 @@ namespace spider {
     * @param progress The current progress
     * @param total The total
     */
-    void drawScrollThumb(Terminal& term, i32 x, i32 y, i32 trackHeight, isize progress, isize total) {
+    void drawScrollThumb(Terminal& term, u32 x, u32 y, u32 trackHeight, isize progress, isize total) {
        if (total == 0 || trackHeight <= 0) return;

        // 1. Draw the background track (Light Shade: ░)
        term.style(Terminal::FG_B_BLACK); // Dim the track
-        for (int i = 0; i < trackHeight; ++i) {
-            term.move(y + i, x).print("░");
+        for (u32 i = 0; i < trackHeight; ++i) {
+            term.move(i32(y + i), i32(x)).print("░");
        }

        // 2. Calculate Thumb Position
@@ -210,10 +210,10 @@ namespace spider {
        f64 ratio = f64(progress) / f64(total);

        // Map to track coordinates
-        i32 thumbOffset = i32(ratio * (trackHeight - 1));
+        u32 thumbOffset = u32(ratio * (trackHeight - 1));

        // 3. Draw the Thumb (Full Block: █)
-        term.move(y + thumbOffset, x);
+        term.move(i32(y + thumbOffset), i32(x));
        term.style(Terminal::FG_WHITE).print("█");
        term.style(Terminal::RESET);
    }
@@ -230,30 +230,30 @@ namespace spider {
     */
    void drawRAM(Terminal& term, RAM& ram, u64 scrollPos) {
        // 1. Draw the container box
-        i32 y = 3;
-        i32 height = 36;
+        u32 y = 3;
+        u32 height = 36;

        // 2. Configuration for the hex layout
-        int addrWidth = addressWidth(ram.size());
-        int bytesPerRow = 8;
-        int displayRows = height - 2; // Subtract top/bottom borders
-        i32 width = (2 + 2 + 16 + 7 + 3 + 8 + 4) + addrWidth;
-        i32 x = 37;
+        u32 addrWidth = addressWidth(ram.size());
+        u32 bytesPerRow = 8;
+        u32 displayRows = height - 2; // Subtract top/bottom borders
+        u32 width = (2 + 2 + 16 + 7 + 3 + 8 + 4) + addrWidth;
+        u32 x = 37;

        // create box
-        term.drawBox(y, x, width, height, "RAM");
+        term.drawBox(i32(y), i32(x), i32(width), i32(height), "RAM");
        drawScrollThumb(term, x + width - 2, y + 1, height - 2, scrollPos, ram.size());

        // Ensure scrollPos is within bounds and aligned
-        if (scrollPos < 0) scrollPos = 0;
+        //if (scrollPos < 0) scrollPos = 0;
        if (scrollPos > ram.size()) scrollPos = ram.size();

-        for (int i = 0; i < displayRows; ++i) {
+        for (u32 i = 0; i < displayRows; ++i) {
            isize currentRowAddr = scrollPos + (i * bytesPerRow);

            // address lock
            if (currentRowAddr >= ram.size()) {
-                term.move(y + 1 + i, x + 1);
+                term.move(i32(y + 1 + i), i32(x + 1));
                term.print(std::string(width - 3, ' '));
                continue;
            }
@@ -266,11 +266,11 @@ namespace spider {
            ss << std::setfill('0') << std::uppercase << std::hex;

            // address
-            ssaddr << std::setw(addrWidth) << currentRowAddr << "  ";
+            ssaddr << std::setw(i32(addrWidth)) << currentRowAddr << "  ";

            // Hex Bytes
            std::string asciiPart = "";
-            for (int j = 0; j < bytesPerRow; ++j) {
+            for (u32 j = 0; j < bytesPerRow; ++j) {
                isize targetAddr = currentRowAddr + j;
                if (targetAddr >= ram.size()) {
                    ss << ""; // Padding for end of memory
@@ -279,12 +279,12 @@ namespace spider {
                }

                u8 byte = ram[targetAddr];
-                ss << std::setfill('0') << std::setw(2) << std::hex << (u32)byte << " ";
-                asciiPart += (std::isprint(byte) ? (char)byte : '.');
+                ss << std::setfill('0') << std::setw(2) << std::hex << u32(byte) << " ";
+                asciiPart += (std::isprint(byte) ? char(byte) : '.');
            }

            // --- Combine and Print ---
-            term.move(y + 1 + i, x + 2); // Move inside the box
+            term.move(i32(y + 1 + i), i32(x + 2)); // Move inside the box
            term.style(Terminal::FG_B_CYAN).print(ssaddr.str()); // Hex part in Cyan
            term.style(Terminal::FG_WHITE).print(ss.str());
            term.style(Terminal::FG_B_YELLOW).print(" | ");
@@ -351,7 +351,7 @@ namespace spider {

        drawTime(t);
        drawHead(t);
-        drawCPUTempl(t, runtime.cpu);
+        drawCPUTempl(t);

        // delay for time
        auto last_exec = std::chrono::steady_clock::now();
@@ -0,0 +1,103 @@
+#include "Matrix.hpp"
+
+#include <immintrin.h>
+#include <type_traits>
+#include <algorithm>
+#include <cstring>
+
+namespace spider {
+
+    template<typename T>
+    void matrix_fill(T diag, Matrix<T> mat) {
+        for (isize i = 0; i < mat.rows; i++) {
+            for (isize j = 0; j < mat.cols; j++) {
+                m.data[i + j * mat.rows] = i == j ? diag : T(0);
+            }
+        }
+    }
+
+    template<typename T>
+    void matrix_mult(Matrix<T> m1, Matrix<T> m2, Matrix<T> mr) {
+        // natural constrains of matrix multiplication
+        if (m1.rows != mr.rows) return;
+        if (m2.cols != mr.cols) return;
+        if (m1.cols != m2.rows) return;
+
+        // fill result with zeroes
+        std::fill(mr.data, mr.data + mr.rows * mr.cols, T(0));
+
+        // Begin Loop
+        for (isize j = 0; j < mr.cols; j++) { // P
+            for (isize n = 0; n < m1.cols; n++) { // N
+                const T val_m2 = m2.data[n + j * m2.rows] * diag;
+                isize i = 0;
+
+#if defined(__AVX__)
+                if constexpr (std::is_same_v<T, float>) {
+                    const __m256 v_m2 = _mm256_set1_ps(val_m2);
+                    for (; i <= mr.rows - 8; i += 8) {
+                        __m256 v_m1 = _mm256_loadu_ps(&m1.data[i + n * m1.rows]);
+                        __m256 v_mr = _mm256_loadu_ps(&mr.data[i + j * mr.rows]);
+                        v_mr = _mm256_fmadd_ps(v_m1, v_m2, v_mr);
+                        _mm256_storeu_ps(&mr.data[i + j * mr.rows], v_mr);
+                    }
+                    if (i < mr.rows) {
+                        float buf_m1[8] = { 0 }, buf_mr[8] = { 0 };
+                        isize rem = mr.rows - i;
+                        std::memcpy(buf_m1, &m1.data[i + n * m1.rows], rem * sizeof(T));
+                        std::memcpy(buf_mr, &mr.data[i + j * mr.rows], rem * sizeof(T));
+                        _mm256_storeu_ps(buf_mr, _mm256_fmadd_ps(_mm256_loadu_ps(buf_m1), v_m2, _mm256_loadu_ps(buf_mr)));
+                        std::memcpy(&mr.data[i + j * mr.rows], buf_mr, rem * sizeof(T));
+                    }
+                }
+                else if constexpr (std::is_same_v<T, double>) {
+                    const __m256d v_m2 = _mm256_set1_pd(val_m2);
+                    for (; i <= mr.rows - 4; i += 4) {
+                        __m256d v_m1 = _mm256_loadu_pd(&m1.data[i + n * m1.rows]);
+                        __m256d v_mr = _mm256_loadu_pd(&mr.data[i + j * mr.rows]);
+                        v_mr = _mm256_fmadd_pd(v_m1, v_m2, v_mr);
+                        _mm256_storeu_pd(&mr.data[i + j * mr.rows], v_mr);
+                    }
+                    if (i < mr.rows) {
+                        double buf_m1[4] = { 0 }, buf_mr[4] = { 0 };
+                        isize rem = mr.rows - i;
+                        std::memcpy(buf_m1, &m1.data[i + n * m1.rows], rem * sizeof(T));
+                        std::memcpy(buf_mr, &mr.data[i + j * mr.rows], rem * sizeof(T));
+                        _mm256_storeu_pd(buf_mr, _mm256_fmadd_pd(_mm256_loadu_pd(buf_m1), v_m2, _mm256_loadu_pd(buf_mr)));
+                        std::memcpy(&mr.data[i + j * mr.rows], buf_mr, rem * sizeof(T));
+                    }
+                }
+                else
+#elif defined(__SSE2__)
+                if constexpr (std::is_same_v<T, float>) {
+                    const __m128 v_m2 = _mm_set1_ps(val_m2);
+                    for (; i <= mr.rows - 4; i += 4) {
+                        __m128 v_m1 = _mm_loadu_ps(&m1.data[i + n * m1.rows]);
+                        __m128 v_mr = _mm_loadu_ps(&mr.data[i + j * mr.rows]);
+                        v_mr = _mm_add_ps(v_mr, _mm_mul_ps(v_m1, v_m2));
+                        _mm_storeu_ps(&mr.data[i + j * mr.rows], v_mr);
+                    }
+                    // Tail buffer logic omitted for brevity, same as float AVX but with size 4
+                }
+                else if constexpr (std::is_same_v<T, double>) {
+                    const __m128d v_m2 = _mm_set1_pd(val_m2);
+                    for (; i <= mr.rows - 2; i += 2) {
+                        __m128d v_m1 = _mm_loadu_pd(&m1.data[i + n * m1.rows]);
+                        __m128d v_mr = _mm_loadu_pd(&mr.data[i + j * mr.rows]);
+                        v_mr = _mm_add_pd(v_mr, _mm_mul_pd(v_m1, v_m2));
+                        _mm_storeu_pd(&mr.data[i + j * mr.rows], v_mr);
+                    }
+                }
+                else
+#endif
+                {
+                    // Fallback for non-SIMD or unsupported types
+                    for (; i < mr.rows; i++) {
+                        mr.data[i + j * mr.rows] += m1.data[i + n * m1.rows] * val_m2;
+                    }
+                }
+            }
+        }
+    }
+
+}
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <spider/runtime/common.hpp>
+
+namespace spider {
+
+    template<typename T>
+    struct Matrix {
+        T* data;
+        isize rows, cols;
+    };
+
+    void matrix_fill(f32 diag, Matrix<f32> mat);
+
+    void matrix_fill(f64 diag, Matrix<f64> mat);
+
+    Matrix<f32> matrix_mul(Matrix<f32> m1, Matrix<f32> m2);
+
+    Matrix<f64> matrix_mul(Matrix<f64> m1, Matrix<f64> m2);
+
+    Matrix<f32> matrix_inv(Matrix<f32> mat);
+
+    Matrix<f64> matrix_inv(Matrix<f64> mat);
+
+    f32 matrix_det(Matrix<f32> mat);
+
+    f64 matrix_det(Matrix<f64> mat);
+
+}
@@ -3,20 +3,57 @@
 #include <iostream>

 namespace spider {
+    
+    /**
+     * Multiplies two quaternions together.
+     * General case, use it when no optimizations exist.
+     */
+    template<typename T> 
+    inline Quat<T> quat_mul_gnrl(Quat<T> A, Quat<T> B) {
+        return {
+            B.w * A.w - B.x * A.x - B.y * A.y - B.z * A.z,
+            B.w * A.x + B.x * A.w - B.y * A.z + B.z * A.y,
+            B.w * A.y + B.x * A.z + B.y * A.w - B.z * A.x,
+            B.w * A.z - B.x * A.y + B.y * A.x + B.z * A.w
+        };
+    }

+    /**
+     * Multiplies two quaternions together.
+     * Attempts to use SIMD instructions when available.
+     */
+    template<typename T> 
+    inline Quat<T> quat_mul_smart(Quat<T> A, Quat<T> B) {
+    }
+
+    Quat<f32> quat_mul(Quat<f32> q1, Quat<f32> q2) {
+        return quat_mul_gnrl<f32>(q1, q2);
+    }
+
+    void quat_mat(Quat<f32> quat, f32* mat) {
+        // TODO
+    }
+
+    Quat<f64> quat_mul(Quat<f64> q1, Quat<f64> q2) {
+        return quat_mul_gnrl<f64>(q1, q2);
+    }
+
+    void quat_mat(Quat<f64> q1, f64* mat) {
+        // TODO
+    }
+
+    /*
    int quatMain() {
        Quat<double> q1 = { 1.0f, 0.0f, 0.0f, 0.0f };
        Quat<double> q2 = { 0.5f, 0.5f, 0.5f, 0.5f };
-
        Quat<double> result = quat_multiply(q1, q2);  // Returns the result!
-
        std::cout << "Result: ("
            << result.w << ", "
            << result.x << ", "
            << result.y << ", "
            << result.z << ")" << std::endl;
-
        return 0;
    }
+    */

 }
@@ -10,15 +10,47 @@ namespace spider {
    };

    /**
-     * Multiplies two quaternions together.
+     * Creates a quaternion from Euler Angles.
     */
-    template<typename T> inline Quat<T> quat_multiply(Quat<T> A, Quat<T> B) {
-        return {
-            B.w * A.w - B.x * A.x - B.y * A.y - B.z * A.z,
-            B.w * A.x + B.x * A.w - B.y * A.z + B.z * A.y,
-            B.w * A.y + B.x * A.z + B.y * A.w - B.z * A.x,
-            B.w * A.z - B.x * A.y + B.y * A.x + B.z * A.w
-        };
-    }
+    Quat<f32> quat_make_euler(f32 x, f32 y, f32 z);
+
+    /**
+     * Creates a quaternion from an axis and an angle.
+     */
+    Quat<f32> quat_make_axis_angle(f32 angle, f32 x, f32 y, f32 z);
+
+    /**
+     * Creates a quaternion from Euler Angles.
+     */
+    Quat<f64> quat_make_euler(f64 x, f64 y, f64 z);
+
+    /**
+     * Creates a quaternion from an axis and an angle.
+     */
+    Quat<f64> quat_make_axis_angle(f64 angle, f64 x, f64 y, f64 z);
+
+    /**
+     * Multiples a quaternion with another quaternion.
+     * The result is output to the qr variable.
+     * This pointer can be the same as q1.
+     */
+    Quat<f32> quat_mul(Quat<f32> q1, Quat<f32> q2);
+
+    /**
+     * Converts a quaternion to a matrix.
+     */
+    void quat_mat(Quat<f32> quat, f32* mat);
+
+    /**
+     * Multiples a quaternion with another quaternion.
+     * The result is output to the qr variable.
+     * This pointer can be the same as q1.
+     */
+    Quat<f64> quat_mul(Quat<f64> q1, Quat<f64> q2);
+
+    /**
+     * Converts a quaternion to a matrix.
+     */
+    void quat_mat(Quat<f64> q1, f64* mat);

 }
@@ -277,14 +277,14 @@ namespace spider {
    template<>
    inline void loadPartialLE<f32>(f32* n, const u8* bytes, isize length) {
        u32 tmp;
-        loadLE(&tmp, bytes);
+        loadPartialLE(&tmp, bytes, length);
        *n = bit_cast<f32>(tmp);
    }

    template<>
    inline void loadPartialLE<f64>(f64* n, const u8* bytes, isize length) {
        u64 tmp;
-        loadLE(&tmp, bytes);
+        loadPartialLE(&tmp, bytes, length);
        *n = bit_cast<f64>(tmp);
    }

@@ -156,8 +156,8 @@ namespace spider {
            if (s.length() >= isize(width)) {
                std::cout << s;
            } else {
-                i32 total_padding = width - s.length();
-                i32 left_padding = total_padding / 2;
+                isize total_padding = isize(width) - s.length();
+                isize left_padding = total_padding / 2;
                std::cout << std::string(left_padding, ' ');
                std::cout << s;
                std::cout << std::string(total_padding - left_padding, ' ');
Author	SHA1	Message	Date
Kittycannon	15bf03097f	Merge branch 'main' of https://git.sintekanalytics.com/SpiderLang/spider-runtime	2026-03-29 07:50:14 -06:00
Kittycannon	43f5d26b3d	I'm still working on this	2026-03-29 07:50:11 -06:00