813 lines
24 KiB
C++
813 lines
24 KiB
C++
#pragma once
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <stdexcept>
|
|
|
|
class AssemblyParser {
|
|
private:
|
|
std::string src;
|
|
size_t pos = 0;
|
|
|
|
std::string peek_str(size_t len) {
|
|
if (pos + len <= src.length()) return src.substr(pos, len);
|
|
return src.substr(pos);
|
|
}
|
|
|
|
char peek() { return pos < src.length() ? src[pos] : '\0'; }
|
|
|
|
void match_char(char expected) {
|
|
if (peek() == expected) pos++;
|
|
else throw std::runtime_error("Unexpected token matching character");
|
|
}
|
|
|
|
void match_string(std::string expected) {
|
|
if (peek_str(expected.length()) == expected) pos += expected.length();
|
|
else throw std::runtime_error("Unexpected token matching string: " + expected);
|
|
}
|
|
|
|
bool isUTF8Alpha() { return isalpha(peek()); }
|
|
bool isWhithespaceCharNotCrLf() { return peek() == ' ' || peek() == '\t'; }
|
|
bool isUTF8CharNotCrLf() { return peek() != '\r' && peek() != '\n' && peek() != '\0'; }
|
|
bool isUTF8CharLitCont() { return peek() != '\'' && peek() != '\\'; }
|
|
bool isUTF8StringLitCont() { return peek() != '"' && peek() != '\\'; }
|
|
|
|
public:
|
|
AssemblyParser(std::string input) : src(input) {}
|
|
|
|
void parse() {
|
|
parse_program();
|
|
if (pos < src.length()) throw std::runtime_error("Trailing characters left unparsed.");
|
|
std::cout << "Assembly source compiled cleanly!" << std::endl;
|
|
}
|
|
|
|
void parse_letter() {
|
|
if (/* option 1 */ true) {
|
|
if (isUTF8Alpha()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8Alpha"); }
|
|
}
|
|
}
|
|
|
|
void parse_digit() {
|
|
if (/* option 1 */ true) {
|
|
match_char('0');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('1');
|
|
} else if (/* option 3 */ true) {
|
|
match_char('2');
|
|
} else if (/* option 4 */ true) {
|
|
match_char('3');
|
|
} else if (/* option 5 */ true) {
|
|
match_char('4');
|
|
} else if (/* option 6 */ true) {
|
|
match_char('5');
|
|
} else if (/* option 7 */ true) {
|
|
match_char('6');
|
|
} else if (/* option 8 */ true) {
|
|
match_char('7');
|
|
} else if (/* option 9 */ true) {
|
|
match_char('8');
|
|
} else if (/* option 10 */ true) {
|
|
match_char('9');
|
|
}
|
|
}
|
|
|
|
void parse_alpha_num_char() {
|
|
if (/* option 1 */ true) {
|
|
parse_letter();
|
|
} else if (/* option 2 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
|
|
void parse_hex_digit() {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
} else if (/* option 2 */ true) {
|
|
match_char('A');
|
|
} else if (/* option 3 */ true) {
|
|
match_char('B');
|
|
} else if (/* option 4 */ true) {
|
|
match_char('C');
|
|
} else if (/* option 5 */ true) {
|
|
match_char('D');
|
|
} else if (/* option 6 */ true) {
|
|
match_char('E');
|
|
} else if (/* option 7 */ true) {
|
|
match_char('F');
|
|
} else if (/* option 8 */ true) {
|
|
match_char('a');
|
|
} else if (/* option 9 */ true) {
|
|
match_char('b');
|
|
} else if (/* option 10 */ true) {
|
|
match_char('c');
|
|
} else if (/* option 11 */ true) {
|
|
match_char('d');
|
|
} else if (/* option 12 */ true) {
|
|
match_char('e');
|
|
} else if (/* option 13 */ true) {
|
|
match_char('f');
|
|
}
|
|
}
|
|
|
|
void parse_octal_digit() {
|
|
if (/* option 1 */ true) {
|
|
match_char('0');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('1');
|
|
} else if (/* option 3 */ true) {
|
|
match_char('2');
|
|
} else if (/* option 4 */ true) {
|
|
match_char('3');
|
|
} else if (/* option 5 */ true) {
|
|
match_char('4');
|
|
} else if (/* option 6 */ true) {
|
|
match_char('5');
|
|
} else if (/* option 7 */ true) {
|
|
match_char('6');
|
|
} else if (/* option 8 */ true) {
|
|
match_char('7');
|
|
}
|
|
}
|
|
|
|
void parse_binary_digit() {
|
|
if (/* option 1 */ true) {
|
|
match_char('0');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('1');
|
|
}
|
|
}
|
|
|
|
void parse_ws_char() {
|
|
if (/* option 1 */ true) {
|
|
if (isWhithespaceCharNotCrLf()) { pos++; } else { throw std::runtime_error("Failed validation for isWhithespaceCharNotCrLf"); }
|
|
}
|
|
}
|
|
|
|
void parse_ws_optional() {
|
|
if (/* option 1 */ true) {
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_char();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_whitespace() {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_char();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_char();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_newline() {
|
|
if (/* option 1 */ true) {
|
|
match_char('\r');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('\n');
|
|
} else if (/* option 3 */ true) {
|
|
match_string("\r\n");
|
|
}
|
|
}
|
|
|
|
void parse_utf8_char() {
|
|
if (/* option 1 */ true) {
|
|
if (isUTF8CharNotCrLf()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8CharNotCrLf"); }
|
|
}
|
|
}
|
|
|
|
void parse_char_escape() {
|
|
if (/* option 1 */ true) {
|
|
match_char('\\');
|
|
parse_utf8_char();
|
|
}
|
|
}
|
|
|
|
void parse_char_content() {
|
|
if (/* option 1 */ true) {
|
|
parse_char_escape();
|
|
} else if (/* option 2 */ true) {
|
|
if (isUTF8CharLitCont()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8CharLitCont"); }
|
|
}
|
|
}
|
|
|
|
void parse_char_lit() {
|
|
if (/* option 1 */ true) {
|
|
match_char('\'');
|
|
parse_char_content();
|
|
match_char('\'');
|
|
}
|
|
}
|
|
|
|
void parse_string_char() {
|
|
if (/* option 1 */ true) {
|
|
parse_char_escape();
|
|
} else if (/* option 2 */ true) {
|
|
if (isUTF8StringLitCont()) { pos++; } else { throw std::runtime_error("Failed validation for isUTF8StringLitCont"); }
|
|
}
|
|
}
|
|
|
|
void parse_string_lit() {
|
|
if (/* option 1 */ true) {
|
|
match_char('"');
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_string_char();
|
|
}
|
|
}
|
|
match_char('"');
|
|
}
|
|
}
|
|
|
|
void parse_identifier() {
|
|
if (/* option 1 */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_letter();
|
|
} else if (/* option 2 */ true) {
|
|
match_char('_');
|
|
}
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_alpha_num_char();
|
|
} else if (/* option 2 */ true) {
|
|
match_char('_');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_comment() {
|
|
if (/* option 1 */ true) {
|
|
match_char(';');
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_utf8_char();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_sign() {
|
|
if (/* option 1 */ true) {
|
|
match_char('+');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('-');
|
|
}
|
|
}
|
|
|
|
void parse_exponent_marker() {
|
|
if (/* option 1 */ true) {
|
|
match_char('e');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('E');
|
|
}
|
|
}
|
|
|
|
void parse_exponent() {
|
|
if (/* option 1 */ true) {
|
|
parse_exponent_marker();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_sign();
|
|
}
|
|
}
|
|
parse_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_decimal_lit() {
|
|
if (/* option 1 */ true) {
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_sign();
|
|
}
|
|
}
|
|
parse_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
match_char('B');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('S');
|
|
} else if (/* option 3 */ true) {
|
|
match_char('I');
|
|
} else if (/* option 4 */ true) {
|
|
match_char('L');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_float_lit() {
|
|
if (/* option 1 */ true) {
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_sign();
|
|
}
|
|
}
|
|
if (/* option 1 */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
match_char('.');
|
|
parse_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_exponent();
|
|
}
|
|
}
|
|
}
|
|
} else if (/* option 2 */ true) {
|
|
if (/* option 1 */ true) {
|
|
match_char('.');
|
|
parse_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_exponent();
|
|
}
|
|
}
|
|
}
|
|
} else if (/* option 3 */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_digit();
|
|
}
|
|
}
|
|
parse_exponent();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
match_char('F');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('D');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_hex_lit() {
|
|
if (/* option 1 */ true) {
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_sign();
|
|
}
|
|
}
|
|
match_string("0x");
|
|
parse_hex_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_hex_digit();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_octal_lit() {
|
|
if (/* option 1 */ true) {
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_sign();
|
|
}
|
|
}
|
|
match_string("0c");
|
|
parse_octal_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_octal_digit();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_binary_lit() {
|
|
if (/* option 1 */ true) {
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_sign();
|
|
}
|
|
}
|
|
match_string("0b");
|
|
parse_binary_digit();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_binary_digit();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_literal() {
|
|
if (/* option 1 */ true) {
|
|
parse_decimal_lit();
|
|
} else if (/* option 2 */ true) {
|
|
parse_float_lit();
|
|
} else if (/* option 3 */ true) {
|
|
parse_hex_lit();
|
|
} else if (/* option 4 */ true) {
|
|
parse_octal_lit();
|
|
} else if (/* option 5 */ true) {
|
|
parse_binary_lit();
|
|
} else if (/* option 6 */ true) {
|
|
parse_string_lit();
|
|
} else if (/* option 7 */ true) {
|
|
parse_char_lit();
|
|
}
|
|
}
|
|
|
|
void parse_literal_cast() {
|
|
if (/* option 1 */ true) {
|
|
if (/* option 1 */ true) {
|
|
match_char('B');
|
|
} else if (/* option 2 */ true) {
|
|
match_char('S');
|
|
} else if (/* option 3 */ true) {
|
|
match_char('I');
|
|
} else if (/* option 4 */ true) {
|
|
match_char('L');
|
|
} else if (/* option 5 */ true) {
|
|
match_char('F');
|
|
} else if (/* option 6 */ true) {
|
|
match_char('D');
|
|
}
|
|
parse_ws_optional();
|
|
match_char('(');
|
|
parse_ws_optional();
|
|
parse_literal();
|
|
parse_ws_optional();
|
|
match_char(')');
|
|
}
|
|
}
|
|
|
|
void parse_literal_decl() {
|
|
if (/* option 1 */ true) {
|
|
parse_literal();
|
|
} else if (/* option 2 */ true) {
|
|
parse_literal_cast();
|
|
}
|
|
}
|
|
|
|
void parse_register() {
|
|
if (/* option 1 */ true) {
|
|
match_char('R');
|
|
parse_alpha_num_char();
|
|
parse_alpha_num_char();
|
|
}
|
|
}
|
|
|
|
void parse_addrm_ind() {
|
|
if (/* option 1 */ true) {
|
|
match_char('[');
|
|
parse_ws_optional();
|
|
parse_literal_decl();
|
|
parse_ws_optional();
|
|
match_char(']');
|
|
}
|
|
}
|
|
|
|
void parse_addrm_ptr() {
|
|
if (/* option 1 */ true) {
|
|
match_char('[');
|
|
parse_ws_optional();
|
|
parse_register();
|
|
parse_ws_optional();
|
|
match_char(']');
|
|
}
|
|
}
|
|
|
|
void parse_addrm_idx() {
|
|
if (/* option 1 */ true) {
|
|
match_char('[');
|
|
parse_ws_optional();
|
|
parse_register();
|
|
parse_ws_optional();
|
|
match_char('+');
|
|
parse_ws_optional();
|
|
parse_literal_decl();
|
|
parse_ws_optional();
|
|
match_char(']');
|
|
}
|
|
}
|
|
|
|
void parse_addrm_sca() {
|
|
if (/* option 1 */ true) {
|
|
match_char('[');
|
|
parse_ws_optional();
|
|
parse_register();
|
|
parse_ws_optional();
|
|
match_char('+');
|
|
parse_register();
|
|
parse_ws_optional();
|
|
match_char('*');
|
|
parse_ws_optional();
|
|
parse_literal_decl();
|
|
parse_ws_optional();
|
|
match_char(']');
|
|
}
|
|
}
|
|
|
|
void parse_addrm_dis() {
|
|
if (/* option 1 */ true) {
|
|
match_char('[');
|
|
parse_ws_optional();
|
|
parse_register();
|
|
parse_ws_optional();
|
|
match_char('+');
|
|
parse_register();
|
|
parse_ws_optional();
|
|
match_char('*');
|
|
parse_ws_optional();
|
|
parse_literal_decl();
|
|
parse_ws_optional();
|
|
match_char('+');
|
|
parse_ws_optional();
|
|
parse_literal_decl();
|
|
parse_ws_optional();
|
|
match_char(']');
|
|
}
|
|
}
|
|
|
|
void parse_addr_modes() {
|
|
if (/* option 1 */ true) {
|
|
parse_addrm_ind();
|
|
} else if (/* option 2 */ true) {
|
|
parse_addrm_ptr();
|
|
} else if (/* option 3 */ true) {
|
|
parse_addrm_idx();
|
|
} else if (/* option 4 */ true) {
|
|
parse_addrm_sca();
|
|
} else if (/* option 5 */ true) {
|
|
parse_addrm_dis();
|
|
}
|
|
}
|
|
|
|
void parse_operand() {
|
|
if (/* option 1 */ true) {
|
|
parse_register();
|
|
} else if (/* option 2 */ true) {
|
|
parse_identifier();
|
|
} else if (/* option 3 */ true) {
|
|
parse_literal_decl();
|
|
} else if (/* option 4 */ true) {
|
|
parse_addr_modes();
|
|
}
|
|
}
|
|
|
|
void parse_opcode() {
|
|
if (/* option 1 */ true) {
|
|
parse_letter();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_alpha_num_char();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_operand_list() {
|
|
if (/* option 1 */ true) {
|
|
parse_operand();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
match_char(',');
|
|
parse_ws_optional();
|
|
parse_operand();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_instruction() {
|
|
if (/* option 1 */ true) {
|
|
parse_opcode();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_whitespace();
|
|
parse_operand_list();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_include_decl() {
|
|
if (/* option 1 */ true) {
|
|
match_string("include");
|
|
parse_whitespace();
|
|
parse_string_lit();
|
|
}
|
|
}
|
|
|
|
void parse_annotation_oper() {
|
|
if (/* option 1 */ true) {
|
|
parse_identifier();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_optional();
|
|
match_char('=');
|
|
parse_ws_optional();
|
|
parse_literal_decl();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_annotation_ops() {
|
|
if (/* option 1 */ true) {
|
|
parse_annotation_oper();
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_optional();
|
|
match_char(',');
|
|
parse_ws_optional();
|
|
parse_annotation_oper();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_annotation_args() {
|
|
if (/* option 1 */ true) {
|
|
match_char('(');
|
|
parse_ws_optional();
|
|
parse_annotation_ops();
|
|
parse_ws_optional();
|
|
match_char(')');
|
|
}
|
|
}
|
|
|
|
void parse_annotation() {
|
|
if (/* option 1 */ true) {
|
|
match_char('@');
|
|
parse_identifier();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_annotation_args();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_section_decl() {
|
|
if (/* option 1 */ true) {
|
|
match_string("section");
|
|
parse_whitespace();
|
|
match_char('.');
|
|
parse_identifier();
|
|
}
|
|
}
|
|
|
|
void parse_label() {
|
|
if (/* option 1 */ true) {
|
|
parse_identifier();
|
|
match_char(':');
|
|
}
|
|
}
|
|
|
|
void parse_line_content() {
|
|
if (/* option 1 */ true) {
|
|
parse_include_decl();
|
|
} else if (/* option 2 */ true) {
|
|
parse_section_decl();
|
|
} else if (/* option 3 */ true) {
|
|
if (/* option 1 */ true) {
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_annotation();
|
|
parse_whitespace();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_label();
|
|
parse_ws_optional();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_instruction();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_line() {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_optional();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_line_content();
|
|
}
|
|
}
|
|
parse_ws_optional();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_comment();
|
|
}
|
|
}
|
|
parse_newline();
|
|
}
|
|
}
|
|
|
|
void parse_line_last() {
|
|
if (/* option 1 */ true) {
|
|
parse_ws_optional();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_line_content();
|
|
}
|
|
}
|
|
parse_ws_optional();
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_comment();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void parse_program() {
|
|
if (/* option 1 */ true) {
|
|
// Repeat block
|
|
while (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_line();
|
|
}
|
|
}
|
|
// Optional block
|
|
if (/* lookahead check */ true) {
|
|
if (/* option 1 */ true) {
|
|
parse_line_last();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|