mirror of
https://github.com/krgamestudios/Toy.git
synced 2026-04-15 14:54:07 +10:00
Started working on a decompiler, called 'bytecode inspector'
It only has a few instructions for now, but I can flesh it out over time.
This commit is contained in:
178
repl/bytecode_inspector.c
Normal file
178
repl/bytecode_inspector.c
Normal file
@@ -0,0 +1,178 @@
|
||||
#include "bytecode_inspector.h"
|
||||
#include "toy_console_colors.h"
|
||||
#include "toy_opcodes.h"
|
||||
#include "toy_value.h"
|
||||
#include "toy_string.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int inspect_instruction(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr);
|
||||
int inspect_read(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr);
|
||||
|
||||
// void inspect_jumps(unsigned char* bytecode, unsigned int pc, unsigned int size);
|
||||
// void inspect_param(unsigned char* bytecode, unsigned int pc, unsigned int size);
|
||||
// void inspect_data(unsigned char* bytecode, unsigned int pc, unsigned int size);
|
||||
// void inspect_subs(unsigned char* bytecode, unsigned int pc, unsigned int size);
|
||||
|
||||
#define MARKER_VALUE(pc, type) \
|
||||
(pc * sizeof(type))
|
||||
|
||||
#define MARKER "\033[" TOY_CC_FONT_BLACK "m" " %lu\t" TOY_CC_RESET
|
||||
|
||||
//exposed functions
|
||||
void inspect_bytecode(unsigned char* bytecode) {
|
||||
//TODO: handle version info
|
||||
|
||||
unsigned int const header_size = 0;
|
||||
unsigned int const header_jumps = 1;
|
||||
unsigned int const header_param = 2;
|
||||
unsigned int const header_data = 3;
|
||||
unsigned int const header_subs = 4;
|
||||
|
||||
//header size
|
||||
printf(MARKER TOY_CC_NOTICE "Bytecode Size: \t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_size, unsigned int), ((unsigned int*)(bytecode))[header_size]);
|
||||
|
||||
//header counts
|
||||
printf(MARKER TOY_CC_NOTICE "Jumps Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_jumps, unsigned int), ((unsigned int*)(bytecode))[header_jumps]);
|
||||
printf(MARKER TOY_CC_NOTICE "Param Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_param, unsigned int), ((unsigned int*)(bytecode))[header_param]);
|
||||
printf(MARKER TOY_CC_NOTICE "Data Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_data, unsigned int), ((unsigned int*)(bytecode))[header_data]);
|
||||
printf(MARKER TOY_CC_NOTICE "Subs Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_subs, unsigned int), ((unsigned int*)(bytecode))[header_subs]);
|
||||
|
||||
printf("\n---\n");
|
||||
|
||||
//some addresses may be absent
|
||||
unsigned int addr_pc = 4;
|
||||
unsigned int code_addr = 0;
|
||||
unsigned int jumps_addr = 0;
|
||||
unsigned int param_addr = 0;
|
||||
unsigned int data_addr = 0;
|
||||
unsigned int subs_addr = 0;
|
||||
|
||||
|
||||
//header addresses
|
||||
if (true) {
|
||||
addr_pc++;
|
||||
printf(MARKER TOY_CC_NOTICE "Code Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
|
||||
code_addr = ((unsigned int*)(bytecode))[addr_pc];
|
||||
}
|
||||
|
||||
if (((unsigned int*)(bytecode))[header_jumps] > 0) {
|
||||
addr_pc++;
|
||||
printf(MARKER TOY_CC_NOTICE "Jumps Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
|
||||
jumps_addr = ((unsigned int*)(bytecode))[addr_pc];
|
||||
}
|
||||
|
||||
if (((unsigned int*)(bytecode))[header_param] > 0) {
|
||||
addr_pc++;
|
||||
printf(MARKER TOY_CC_NOTICE "Param Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
|
||||
param_addr = ((unsigned int*)(bytecode))[addr_pc];
|
||||
}
|
||||
|
||||
if (((unsigned int*)(bytecode))[header_data] > 0) {
|
||||
addr_pc++;
|
||||
printf(MARKER TOY_CC_NOTICE "Data Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
|
||||
data_addr = ((unsigned int*)(bytecode))[addr_pc];
|
||||
}
|
||||
|
||||
if (((unsigned int*)(bytecode))[header_subs] > 0) {
|
||||
addr_pc++;
|
||||
printf(MARKER TOY_CC_NOTICE "Subs Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
|
||||
subs_addr = ((unsigned int*)(bytecode))[addr_pc];
|
||||
}
|
||||
|
||||
printf("\n---\n");
|
||||
|
||||
//read the code instructions one-by-one
|
||||
unsigned int pc = code_addr;
|
||||
while(pc < jumps_addr) {
|
||||
pc += inspect_instruction(bytecode, pc, jumps_addr, data_addr);
|
||||
}
|
||||
|
||||
(void)jumps_addr;
|
||||
(void)param_addr;
|
||||
(void)data_addr;
|
||||
(void)subs_addr;
|
||||
}
|
||||
|
||||
int inspect_instruction(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr) {
|
||||
//read and print the opcode instruction at 'pc'
|
||||
|
||||
Toy_OpcodeType opcode = bytecode[pc];
|
||||
|
||||
switch(opcode) {
|
||||
case TOY_OPCODE_READ:
|
||||
return inspect_read(bytecode, pc, jumps_addr, data_addr);
|
||||
|
||||
case TOY_OPCODE_RETURN:
|
||||
printf(MARKER "Keyword RETURN (%u)\n", MARKER_VALUE(pc, unsigned char), bytecode[pc + 1]);
|
||||
return 4;
|
||||
|
||||
case TOY_OPCODE_PRINT:
|
||||
printf(MARKER "Keyword PRINT\n", MARKER_VALUE(pc, unsigned char));
|
||||
return 4;
|
||||
|
||||
default:
|
||||
printf(MARKER TOY_CC_WARN "Unknown Word: [%u, %u, %u, %u]" TOY_CC_RESET "\n", MARKER_VALUE(pc, unsigned char), bytecode[pc], bytecode[pc+1], bytecode[pc+2], bytecode[pc+3]);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
int inspect_read(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr) {
|
||||
Toy_ValueType type = bytecode[pc + 1];
|
||||
|
||||
switch(type) {
|
||||
case TOY_VALUE_NULL: {
|
||||
printf(MARKER "READ NULL\n", MARKER_VALUE(pc, unsigned char));
|
||||
return 4;
|
||||
}
|
||||
|
||||
case TOY_VALUE_BOOLEAN: {
|
||||
if (bytecode[pc + 2]) {
|
||||
printf(MARKER "READ BOOL true\n", MARKER_VALUE(pc, unsigned char));
|
||||
}
|
||||
else {
|
||||
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
case TOY_VALUE_INTEGER: {
|
||||
int i = *(int*)(bytecode + pc + 4);
|
||||
printf(MARKER "READ INTEGER %d\n", MARKER_VALUE(pc, unsigned char), i);
|
||||
return 8;
|
||||
}
|
||||
|
||||
case TOY_VALUE_FLOAT: {
|
||||
float i = *(float*)(bytecode + pc + 4);
|
||||
printf(MARKER "READ FLOAT %f\n", MARKER_VALUE(pc, unsigned char), i);
|
||||
return 8;
|
||||
}
|
||||
|
||||
case TOY_VALUE_STRING: {
|
||||
Toy_StringType stringType = (Toy_StringType)(*(bytecode + pc + 2)); //TODO: not needed?
|
||||
int len = bytecode[pc + 3]; //only used for names?
|
||||
|
||||
(void)stringType;
|
||||
|
||||
unsigned int indexValue = *((unsigned int*)(bytecode + pc + 4));
|
||||
unsigned int jumpValue = *((unsigned int*)(bytecode + jumps_addr + indexValue));
|
||||
char* cstr = ((char*)(bytecode + data_addr + jumpValue));
|
||||
|
||||
printf(MARKER "READ STRING (%d) %s\n", MARKER_VALUE(pc, unsigned char), len, cstr);
|
||||
|
||||
return 8;
|
||||
}
|
||||
|
||||
case TOY_VALUE_ARRAY:
|
||||
case TOY_VALUE_TABLE:
|
||||
case TOY_VALUE_FUNCTION:
|
||||
case TOY_VALUE_OPAQUE:
|
||||
case TOY_VALUE_ANY:
|
||||
case TOY_VALUE_UNKNOWN:
|
||||
default: {
|
||||
printf(MARKER "READ ???\n", MARKER_VALUE(pc, unsigned char));
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
3
repl/bytecode_inspector.h
Normal file
3
repl/bytecode_inspector.h
Normal file
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
void inspect_bytecode(unsigned char* bytecode);
|
||||
168
repl/main.c
168
repl/main.c
@@ -1,3 +1,5 @@
|
||||
#include "bytecode_inspector.h"
|
||||
|
||||
#include "toy_console_colors.h"
|
||||
|
||||
#include "toy_lexer.h"
|
||||
@@ -127,7 +129,7 @@ typedef struct CmdLine {
|
||||
bool silentPrint;
|
||||
bool silentAssert;
|
||||
bool removeAssert;
|
||||
bool verboseDebugPrint;
|
||||
bool verbose;
|
||||
} CmdLine;
|
||||
|
||||
void usageCmdLine(int argc, const char* argv[]) {
|
||||
@@ -189,7 +191,7 @@ CmdLine parseCmdLine(int argc, const char* argv[]) {
|
||||
.silentPrint = false,
|
||||
.silentAssert = false,
|
||||
.removeAssert = false,
|
||||
.verboseDebugPrint = false,
|
||||
.verbose = false,
|
||||
};
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
@@ -239,7 +241,7 @@ CmdLine parseCmdLine(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
else if (!strcmp(argv[i], "-d") || !strcmp(argv[i], "--verbose")) {
|
||||
cmd.verboseDebugPrint = true;
|
||||
cmd.verbose = true;
|
||||
}
|
||||
|
||||
else {
|
||||
@@ -250,80 +252,6 @@ CmdLine parseCmdLine(int argc, const char* argv[]) {
|
||||
return cmd;
|
||||
}
|
||||
|
||||
//repl function
|
||||
int repl(const char* filepath) {
|
||||
//output options
|
||||
Toy_setPrintCallback(printCallback);
|
||||
Toy_setErrorCallback(errorAndContinueCallback);
|
||||
Toy_setAssertFailureCallback(assertFailureAndContinueCallback);
|
||||
|
||||
//vars to use
|
||||
char prompt[256];
|
||||
getFileName(prompt, filepath);
|
||||
unsigned int INPUT_BUFFER_SIZE = 4096;
|
||||
char inputBuffer[INPUT_BUFFER_SIZE];
|
||||
memset(inputBuffer, 0, INPUT_BUFFER_SIZE);
|
||||
|
||||
Toy_Bucket* bucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
|
||||
|
||||
Toy_VM vm;
|
||||
Toy_initVM(&vm);
|
||||
|
||||
printf("%s> ", prompt); //shows the terminal prompt and begin
|
||||
|
||||
unsigned int runCount = 0; //used for initial preserveScope
|
||||
|
||||
//read from the terminal
|
||||
while(fgets(inputBuffer, INPUT_BUFFER_SIZE, stdin)) {
|
||||
//work around fgets() adding a newline
|
||||
unsigned int length = strlen(inputBuffer);
|
||||
if (inputBuffer[length - 1] == '\n') {
|
||||
inputBuffer[--length] = '\0';
|
||||
}
|
||||
|
||||
if (length == 0 || !inputBuffer[ strspn(inputBuffer, " \r\n\t") ]) {
|
||||
printf("%s> ", prompt); //shows the terminal prompt and restart
|
||||
continue;
|
||||
}
|
||||
|
||||
//end
|
||||
if (strlen(inputBuffer) == 4 && (strncmp(inputBuffer, "exit", 4) == 0 || strncmp(inputBuffer, "quit", 4) == 0)) {
|
||||
break;
|
||||
}
|
||||
|
||||
//parse the input, prep the VM for execution
|
||||
Toy_Lexer lexer;
|
||||
Toy_bindLexer(&lexer, inputBuffer);
|
||||
Toy_Parser parser;
|
||||
Toy_bindParser(&parser, &lexer);
|
||||
Toy_Ast* ast = Toy_scanParser(&bucket, &parser); //Ast is in the bucket, so it doesn't need to be freed
|
||||
|
||||
//parsing error, retry
|
||||
if (parser.error) {
|
||||
printf("%s> ", prompt); //shows the terminal prompt
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned char* bytecode = Toy_compileToBytecode(ast);
|
||||
Toy_bindVM(&vm, bytecode, runCount++ > 0);
|
||||
|
||||
//run
|
||||
Toy_runVM(&vm);
|
||||
|
||||
//free the memory, and leave the VM ready for the next loop
|
||||
Toy_resetVM(&vm, true);
|
||||
free(bytecode);
|
||||
|
||||
printf("%s> ", prompt); //shows the terminal prompt
|
||||
}
|
||||
|
||||
//cleanup all memory
|
||||
Toy_freeVM(&vm);
|
||||
Toy_freeBucket(&bucket);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//debugging
|
||||
static void debugStackPrint(Toy_Stack* stack) {
|
||||
//DEBUG: if there's anything on the stack, print it
|
||||
@@ -385,6 +313,87 @@ static void debugScopePrint(Toy_Scope* scope, int depth) {
|
||||
}
|
||||
}
|
||||
|
||||
//repl function
|
||||
int repl(const char* filepath, bool verbose) {
|
||||
//output options
|
||||
Toy_setPrintCallback(printCallback);
|
||||
Toy_setErrorCallback(errorAndContinueCallback);
|
||||
Toy_setAssertFailureCallback(assertFailureAndContinueCallback);
|
||||
|
||||
//vars to use
|
||||
char prompt[256];
|
||||
getFileName(prompt, filepath);
|
||||
unsigned int INPUT_BUFFER_SIZE = 4096;
|
||||
char inputBuffer[INPUT_BUFFER_SIZE];
|
||||
memset(inputBuffer, 0, INPUT_BUFFER_SIZE);
|
||||
|
||||
Toy_Bucket* bucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
|
||||
|
||||
Toy_VM vm;
|
||||
Toy_initVM(&vm);
|
||||
|
||||
printf("%s> ", prompt); //shows the terminal prompt and begin
|
||||
|
||||
unsigned int runCount = 0; //used for initial preserveScope
|
||||
|
||||
//read from the terminal
|
||||
while(fgets(inputBuffer, INPUT_BUFFER_SIZE, stdin)) {
|
||||
//work around fgets() adding a newline
|
||||
unsigned int length = strlen(inputBuffer);
|
||||
if (inputBuffer[length - 1] == '\n') {
|
||||
inputBuffer[--length] = '\0';
|
||||
}
|
||||
|
||||
if (length == 0 || !inputBuffer[ strspn(inputBuffer, " \r\n\t") ]) {
|
||||
printf("%s> ", prompt); //shows the terminal prompt and restart
|
||||
continue;
|
||||
}
|
||||
|
||||
//end
|
||||
if (strlen(inputBuffer) == 4 && (strncmp(inputBuffer, "exit", 4) == 0 || strncmp(inputBuffer, "quit", 4) == 0)) {
|
||||
break;
|
||||
}
|
||||
|
||||
//parse the input, prep the VM for execution
|
||||
Toy_Lexer lexer;
|
||||
Toy_bindLexer(&lexer, inputBuffer);
|
||||
Toy_Parser parser;
|
||||
Toy_bindParser(&parser, &lexer);
|
||||
Toy_Ast* ast = Toy_scanParser(&bucket, &parser); //Ast is in the bucket, so it doesn't need to be freed
|
||||
|
||||
//parsing error, retry
|
||||
if (parser.error) {
|
||||
printf("%s> ", prompt); //shows the terminal prompt
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned char* bytecode = Toy_compileToBytecode(ast);
|
||||
Toy_bindVM(&vm, bytecode, runCount++ > 0);
|
||||
|
||||
//run
|
||||
Toy_runVM(&vm);
|
||||
|
||||
//print the debug info
|
||||
if (verbose) {
|
||||
debugStackPrint(vm.stack);
|
||||
debugScopePrint(vm.scope, 0);
|
||||
inspect_bytecode(bytecode);
|
||||
}
|
||||
|
||||
//free the memory, and leave the VM ready for the next loop
|
||||
Toy_resetVM(&vm, true);
|
||||
free(bytecode);
|
||||
|
||||
printf("%s> ", prompt); //shows the terminal prompt
|
||||
}
|
||||
|
||||
//cleanup all memory
|
||||
Toy_freeVM(&vm);
|
||||
Toy_freeBucket(&bucket);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//main file
|
||||
int main(int argc, const char* argv[]) {
|
||||
Toy_setPrintCallback(printCallback);
|
||||
@@ -462,9 +471,10 @@ int main(int argc, const char* argv[]) {
|
||||
Toy_runVM(&vm);
|
||||
|
||||
//print the debug info
|
||||
if (cmd.verboseDebugPrint) { //URGENT: 'verbose' option is mainly for the WIP elements, like decompiler
|
||||
if (cmd.verbose) {
|
||||
debugStackPrint(vm.stack);
|
||||
debugScopePrint(vm.scope, 0);
|
||||
inspect_bytecode(bytecode);
|
||||
}
|
||||
|
||||
//cleanup
|
||||
@@ -472,7 +482,7 @@ int main(int argc, const char* argv[]) {
|
||||
free(bytecode);
|
||||
}
|
||||
else {
|
||||
repl(argv[0]);
|
||||
repl(argv[0], cmd.verbose);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1,20 +1,5 @@
|
||||
|
||||
|
||||
/*
|
||||
{
|
||||
fn hello() {
|
||||
print "Hello world";
|
||||
}
|
||||
hello();
|
||||
}
|
||||
*/
|
||||
|
||||
{
|
||||
fn hello(arg) {
|
||||
print arg;
|
||||
}
|
||||
|
||||
hello("world");
|
||||
}
|
||||
|
||||
print "hello world";
|
||||
|
||||
print [];
|
||||
@@ -62,7 +62,7 @@ static void processRead(Toy_VM* vm) {
|
||||
}
|
||||
|
||||
case TOY_VALUE_STRING: {
|
||||
enum Toy_StringType stringType = READ_BYTE(vm);
|
||||
Toy_StringType stringType = READ_BYTE(vm);
|
||||
int len = (int)READ_BYTE(vm); //WARN: only used for name strings
|
||||
(void)len;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user