beginning of compiler

This commit is contained in:
2026-06-20 11:53:08 -06:00
parent 5ddecb0c38
commit 9176c4882f
20 changed files with 1784 additions and 0 deletions
View File
+10
View File
@@ -0,0 +1,10 @@
#pragma
#include <spider/compiler/common.hpp>
namespace spider {
class Token;
class RootToken;
}
@@ -0,0 +1,35 @@
#include "Assembler.hpp"
namespace spider {
Assembler::Assembler() {}
Assembler::~Assembler() {}
Assembler::Error Assembler::loadFile(const fs::path& path) {
// check if path exists
fs::path abs_path = fs::canonical(path);
if(!fs::exists(abs_path)) return Error::FILE_NOT_FOUND;
// check if recursive
if(fstack.contains(abs_path)) return Error::FILE_RECURSIVE_LOAD;
auto ir = fstack.insert(abs_path);
// Actually load!
levels.emplace_back(Level {
.reader = std::make_unique<TextReader>(new FileTextReader(abs_path.string())),
.source = abs_path.string(),
});
parseCurrentLevel();
// alright!
fstack.erase(ir.first);
return Error::SUCCESS;
}
void Assembler::parseCurrentLevel() {
auto& lvl = levels.back();
}
}
@@ -0,0 +1,52 @@
#pragma once
#include <spider/compiler/common.hpp>
#include <spider/compiler/text/TextReader.hpp>
#include <spider/compiler/tokens/RootToken.hpp>
namespace spider {
/**
* The spider assembler, capable of
* converting text into bytecode.
*/
class Assembler {
public:
enum class Error {
SUCCESS,
FILE_NOT_FOUND, FILE_RECURSIVE_LOAD,
};
struct Level {
uptr<TextReader> reader;
RootToken root;
std::string source;
};
public:
set<fs::path> fstack;
deque<Level> levels;
public:
Assembler();
~Assembler();
public:
/**
* Attempts to load a file, fails if it
* doesn't exist.
*/
Error loadFile(const fs::path& path);
private:
void parseCurrentLevel();
};
}
@@ -0,0 +1,11 @@
#pragma once
namespace spider {
/**
* A disassembler, capable of converting bytecode into
* readable text.
*/
class Disassembler {};
}
+55
View File
@@ -0,0 +1,55 @@
#pragma once
#include <cstdint>
#include <vector>
#include <deque>
#include <map>
#include <optional>
#include <string>
#include <memory>
#include <filesystem>
#include <set>
namespace spider {
// Absolute Types
using u8 = std::uint8_t;
using u16 = std::uint16_t;
using u32 = std::uint32_t;
using u64 = std::uint64_t;
using i8 = std::int8_t;
using i16 = std::int16_t;
using i32 = std::int32_t;
using i64 = std::int64_t;
using f32 = float; // TODO: SPIDER_EMULATE_FLOAT will control this
using f64 = double;
// TODO: Check if we're on C++23, there is already stdfloat
static_assert(sizeof(f32) == 4, "The f32 type must be exactly 4 bytes.");
static_assert(sizeof(f64) == 8, "The f64 type must be exactly 8 bytes.");
// Utility types
using isize = std::size_t;
// Utility imports
using std::vector;
using std::deque;
using std::map;
using std::optional;
using std::set;
template<typename T> using ptr = std::shared_ptr<T>;
template<typename T> using uptr = std::unique_ptr<T>;
namespace fs = std::filesystem;
struct pos {
isize line;
isize col;
pos(isize line = 1, isize col = 1)
: line(line), col(col) {}
};
}
+104
View File
@@ -0,0 +1,104 @@
#include "TextReader.hpp"
#include <spider/compiler/text/utf8.hpp>
#include <stdexcept>
namespace spider {
// Text Reader //
int TextReader::nextByte() {
int ch = getStream().get();
if (ch == std::istream::traits_type::eof()) {
return -1;
}
return ch;
}
bool TextReader::nextChar(u32& ch) {
int n = nextByte();
if(n == -1) return false;
isize len = utf8::seqlen(u8(n));
if(len == 0) return false;
isize i = 1;
char arr[4];
arr[0] = char(n);
while(i < len) {
n = nextByte();
if(n == -1) return false;
arr[i++] = char(n);
}
ch = utf8::decodeArr(arr, len);
advance(ch);
return true;
}
void TextReader::advance(u32 ch) {
if (ch == u32('\n')) {
if (lastWasCR) {
lastWasCR = false; // Mixed CRLF handling
} else {
at.line++;
at.col = 1;
}
} else if (ch == u32('\r')) {
at.line++;
at.col = 1;
lastWasCR = true;
} else {
at.col++;
lastWasCR = false;
}
}
bool TextReader::isEOF() {
return getStream().peek() == std::istream::traits_type::eof();
}
pos TextReader::getPosition() const {
return at;
}
// File Reader //
FileTextReader::FileTextReader(const std::string& filename)
: fileStream(filename, std::ios::binary) {
if (!fileStream.is_open()) {
throw std::runtime_error("Failed to open file: " + filename);
}
}
std::istream& FileTextReader::getStream() {
return fileStream;
}
// String Reader //
StringTextReader::StringTextReader(std::string initialText)
: buffer(std::move(initialText)),
stringStream(std::make_unique<std::istringstream>(buffer)) {
}
std::istream& StringTextReader::getStream() {
return *stringStream;
}
void StringTextReader::set(const std::string& newText) {
buffer = newText;
stringStream = std::make_unique<std::istringstream>(buffer);
lastWasCR = false;
}
void StringTextReader::append(const std::string& extraText) {
std::streampos pos = stringStream->tellg();
buffer += extraText;
stringStream = std::make_unique<std::istringstream>(buffer);
stringStream->seekg(pos);
}
}
+91
View File
@@ -0,0 +1,91 @@
#pragma once
#include <spider/compiler/common.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <memory>
namespace spider {
/**
* Abstract Text Reader
*/
class TextReader {
protected:
pos at;
bool lastWasCR = false;
public:
TextReader() = default;
virtual ~TextReader() = default;
protected:
int nextByte();
public:
bool nextChar(u32& ch);
bool isEOF();
pos getPosition() const;
protected:
void advance(u32 ch);
virtual std::istream& getStream() = 0;
};
/**
* File Text Reader
*/
class FileTextReader : public TextReader {
private:
std::ifstream fileStream;
public:
explicit FileTextReader(const std::string& filename);
protected:
std::istream& getStream() override;
};
/**
* String Text Reader
*/
class StringTextReader : public TextReader {
private:
std::string buffer;
std::unique_ptr<std::istringstream> stringStream;
public:
explicit StringTextReader(std::string initialText = "");
public:
void set(const std::string& newText);
void append(const std::string& extraText);
protected:
std::istream& getStream() override;
};
}
+91
View File
@@ -0,0 +1,91 @@
#pragma once
#include <spider/compiler/common.hpp>
#include <cstdint>
#include <cstddef>
#include <string>
namespace spider {
namespace utf8 {
// --------------------- //
// UTF-8 Sequence Length //
// --------------------- //
constexpr isize seqlen(u8 c) {
if ((c & 0x80) == 0x00) return 1;
if ((c & 0xE0) == 0xC0) return 2;
if ((c & 0xF0) == 0xE0) return 3;
if ((c & 0xF8) == 0xF0) return 4;
return 0;
}
constexpr bool isCont(u8 c) {
return (c & 0xC0) == 0x80;
}
constexpr isize isValidSeq(const char* src, isize len) {
if (len == 0) return 0;
isize m = seqlen(u8(src[0]));
if (m == 0 || m > len) return 0;
for (isize i = 1; i < m; i++) {
if (!isCont(u8(src[i]))) return 0;
}
return m;
}
// ----------------- //
// UTF-8 into UTF-32 //
// ----------------- //
inline isize decode(const char* src, isize len, u32& out) {
// check input is valid
isize charlen = isValidSeq(src, len);
if (charlen == 0) return 0;
// map of masks, starts at 1
static constexpr u8 firstMask[5] = {
0x00, // unused
0x7F, // 0xxxxxxx
0x1F, // 110xxxxx
0x0F, // 1110xxxx
0x07 // 11110xxx
};
// assemble the char
out = u8(src[0]) & firstMask[charlen];
for (isize i = 1; i < charlen; ++i) {
out <<= 6;
out |= u8(src[i]) & 0x3F;
}
return charlen;
}
/**
* A simpler version, which consider it already
* having a validated input array
*/
inline u32 decodeArr(const char* src, isize chlen) {
// map of masks, starts at 1
static constexpr u8 firstMask[5] = {
0x00, // unused
0x7F, // 0xxxxxxx
0x1F, // 110xxxxx
0x0F, // 1110xxxx
0x07 // 11110xxx
};
// assemble the char
u32 out = u8(src[0]) & firstMask[chlen];
for (isize i = 1; i < chlen; ++i) {
out <<= 6;
out |= u8(src[i]) & 0x3F;
}
return out;
}
}
}
+25
View File
@@ -0,0 +1,25 @@
#pragma once
#include <spider/compiler/common.hpp>
namespace spider {
/**
* Defines the root of a token.
*/
class RootToken {
private:
public:
RootToken();
~RootToken();
public:
void token();
};
}
+26
View File
@@ -0,0 +1,26 @@
#include "Token.hpp"
namespace spider {
Token::Token(pos _at, TokenType _type, std::string _str)
: at(_at), type(_type), str(_str) {}
Token::Token(const Token& tok)
: at(tok.at), type(tok.type), str(tok.str), inner(tok.inner) {}
Token::Token(Token&& tok)
: at(tok.at), type(tok.type), str(tok.str), inner(std::move(tok.inner)) {}
void Token::append(const Token& tok) {
inner.push_back(tok);
}
vector<Token> Token::getInner() {
return inner;
}
isize Token::innerCount() {
return inner.size();
}
}
+56
View File
@@ -0,0 +1,56 @@
#pragma once
#include <spider/compiler/common.hpp>
namespace spider {
/**
* Token type.
*/
enum class TokenType {
// Assembly
PREPROCESSOR_TAG,
WHITESPACE, NEWLINE,
INSTRUCTION, OPCODE,
OPERATOR, OPERAND,
REGISTER, NUMBER, BIN_NUMBER, OCT_NUMBER, HEX_NUMBER, ADDR_NUMBER,
BRACKET, BRACKET_IND, BRACKET_PTR, BRACKET_IDX, COMMA, COMMENT,
SECTION, VARIABLE, ASSIGNMENT
// Classic
// Script
};
/**
* Defines a general token.
*/
class Token {
public:
const pos at;
const TokenType type;
const std::string str;
private:
vector<Token> inner;
public:
Token(pos _at, TokenType _type, std::string _str);
Token(const Token& tok);
Token(Token&& tok);
public:
void append(const Token& tok);
vector<Token> getInner();
isize innerCount();
};
}