Started working on a decompiler, called 'bytecode inspector'

It only has a few instructions for now, but I can flesh it out over
time.
This commit is contained in:
2026-04-11 13:37:26 +10:00
parent 49a825aaf9
commit b32ea9f309
5 changed files with 273 additions and 97 deletions

178
repl/bytecode_inspector.c Normal file
View File

@@ -0,0 +1,178 @@
#include "bytecode_inspector.h"
#include "toy_console_colors.h"
#include "toy_opcodes.h"
#include "toy_value.h"
#include "toy_string.h"
#include <stdio.h>
#include <stdlib.h>
int inspect_instruction(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr);
int inspect_read(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr);
// void inspect_jumps(unsigned char* bytecode, unsigned int pc, unsigned int size);
// void inspect_param(unsigned char* bytecode, unsigned int pc, unsigned int size);
// void inspect_data(unsigned char* bytecode, unsigned int pc, unsigned int size);
// void inspect_subs(unsigned char* bytecode, unsigned int pc, unsigned int size);
#define MARKER_VALUE(pc, type) \
(pc * sizeof(type))
#define MARKER "\033[" TOY_CC_FONT_BLACK "m" " %lu\t" TOY_CC_RESET
//exposed functions
void inspect_bytecode(unsigned char* bytecode) {
//TODO: handle version info
unsigned int const header_size = 0;
unsigned int const header_jumps = 1;
unsigned int const header_param = 2;
unsigned int const header_data = 3;
unsigned int const header_subs = 4;
//header size
printf(MARKER TOY_CC_NOTICE "Bytecode Size: \t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_size, unsigned int), ((unsigned int*)(bytecode))[header_size]);
//header counts
printf(MARKER TOY_CC_NOTICE "Jumps Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_jumps, unsigned int), ((unsigned int*)(bytecode))[header_jumps]);
printf(MARKER TOY_CC_NOTICE "Param Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_param, unsigned int), ((unsigned int*)(bytecode))[header_param]);
printf(MARKER TOY_CC_NOTICE "Data Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_data, unsigned int), ((unsigned int*)(bytecode))[header_data]);
printf(MARKER TOY_CC_NOTICE "Subs Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_subs, unsigned int), ((unsigned int*)(bytecode))[header_subs]);
printf("\n---\n");
//some addresses may be absent
unsigned int addr_pc = 4;
unsigned int code_addr = 0;
unsigned int jumps_addr = 0;
unsigned int param_addr = 0;
unsigned int data_addr = 0;
unsigned int subs_addr = 0;
//header addresses
if (true) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Code Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
code_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_jumps] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Jumps Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
jumps_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_param] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Param Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
param_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_data] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Data Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
data_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_subs] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Subs Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
subs_addr = ((unsigned int*)(bytecode))[addr_pc];
}
printf("\n---\n");
//read the code instructions one-by-one
unsigned int pc = code_addr;
while(pc < jumps_addr) {
pc += inspect_instruction(bytecode, pc, jumps_addr, data_addr);
}
(void)jumps_addr;
(void)param_addr;
(void)data_addr;
(void)subs_addr;
}
int inspect_instruction(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr) {
//read and print the opcode instruction at 'pc'
Toy_OpcodeType opcode = bytecode[pc];
switch(opcode) {
case TOY_OPCODE_READ:
return inspect_read(bytecode, pc, jumps_addr, data_addr);
case TOY_OPCODE_RETURN:
printf(MARKER "Keyword RETURN (%u)\n", MARKER_VALUE(pc, unsigned char), bytecode[pc + 1]);
return 4;
case TOY_OPCODE_PRINT:
printf(MARKER "Keyword PRINT\n", MARKER_VALUE(pc, unsigned char));
return 4;
default:
printf(MARKER TOY_CC_WARN "Unknown Word: [%u, %u, %u, %u]" TOY_CC_RESET "\n", MARKER_VALUE(pc, unsigned char), bytecode[pc], bytecode[pc+1], bytecode[pc+2], bytecode[pc+3]);
return 4;
}
}
int inspect_read(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr) {
Toy_ValueType type = bytecode[pc + 1];
switch(type) {
case TOY_VALUE_NULL: {
printf(MARKER "READ NULL\n", MARKER_VALUE(pc, unsigned char));
return 4;
}
case TOY_VALUE_BOOLEAN: {
if (bytecode[pc + 2]) {
printf(MARKER "READ BOOL true\n", MARKER_VALUE(pc, unsigned char));
}
else {
}
return 4;
}
case TOY_VALUE_INTEGER: {
int i = *(int*)(bytecode + pc + 4);
printf(MARKER "READ INTEGER %d\n", MARKER_VALUE(pc, unsigned char), i);
return 8;
}
case TOY_VALUE_FLOAT: {
float i = *(float*)(bytecode + pc + 4);
printf(MARKER "READ FLOAT %f\n", MARKER_VALUE(pc, unsigned char), i);
return 8;
}
case TOY_VALUE_STRING: {
Toy_StringType stringType = (Toy_StringType)(*(bytecode + pc + 2)); //TODO: not needed?
int len = bytecode[pc + 3]; //only used for names?
(void)stringType;
unsigned int indexValue = *((unsigned int*)(bytecode + pc + 4));
unsigned int jumpValue = *((unsigned int*)(bytecode + jumps_addr + indexValue));
char* cstr = ((char*)(bytecode + data_addr + jumpValue));
printf(MARKER "READ STRING (%d) %s\n", MARKER_VALUE(pc, unsigned char), len, cstr);
return 8;
}
case TOY_VALUE_ARRAY:
case TOY_VALUE_TABLE:
case TOY_VALUE_FUNCTION:
case TOY_VALUE_OPAQUE:
case TOY_VALUE_ANY:
case TOY_VALUE_UNKNOWN:
default: {
printf(MARKER "READ ???\n", MARKER_VALUE(pc, unsigned char));
return 4;
}
}
}

View File

@@ -0,0 +1,3 @@
#pragma once
void inspect_bytecode(unsigned char* bytecode);

View File

@@ -1,3 +1,5 @@
#include "bytecode_inspector.h"
#include "toy_console_colors.h" #include "toy_console_colors.h"
#include "toy_lexer.h" #include "toy_lexer.h"
@@ -127,7 +129,7 @@ typedef struct CmdLine {
bool silentPrint; bool silentPrint;
bool silentAssert; bool silentAssert;
bool removeAssert; bool removeAssert;
bool verboseDebugPrint; bool verbose;
} CmdLine; } CmdLine;
void usageCmdLine(int argc, const char* argv[]) { void usageCmdLine(int argc, const char* argv[]) {
@@ -189,7 +191,7 @@ CmdLine parseCmdLine(int argc, const char* argv[]) {
.silentPrint = false, .silentPrint = false,
.silentAssert = false, .silentAssert = false,
.removeAssert = false, .removeAssert = false,
.verboseDebugPrint = false, .verbose = false,
}; };
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
@@ -239,7 +241,7 @@ CmdLine parseCmdLine(int argc, const char* argv[]) {
} }
else if (!strcmp(argv[i], "-d") || !strcmp(argv[i], "--verbose")) { else if (!strcmp(argv[i], "-d") || !strcmp(argv[i], "--verbose")) {
cmd.verboseDebugPrint = true; cmd.verbose = true;
} }
else { else {
@@ -250,80 +252,6 @@ CmdLine parseCmdLine(int argc, const char* argv[]) {
return cmd; return cmd;
} }
//repl function
int repl(const char* filepath) {
//output options
Toy_setPrintCallback(printCallback);
Toy_setErrorCallback(errorAndContinueCallback);
Toy_setAssertFailureCallback(assertFailureAndContinueCallback);
//vars to use
char prompt[256];
getFileName(prompt, filepath);
unsigned int INPUT_BUFFER_SIZE = 4096;
char inputBuffer[INPUT_BUFFER_SIZE];
memset(inputBuffer, 0, INPUT_BUFFER_SIZE);
Toy_Bucket* bucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
Toy_VM vm;
Toy_initVM(&vm);
printf("%s> ", prompt); //shows the terminal prompt and begin
unsigned int runCount = 0; //used for initial preserveScope
//read from the terminal
while(fgets(inputBuffer, INPUT_BUFFER_SIZE, stdin)) {
//work around fgets() adding a newline
unsigned int length = strlen(inputBuffer);
if (inputBuffer[length - 1] == '\n') {
inputBuffer[--length] = '\0';
}
if (length == 0 || !inputBuffer[ strspn(inputBuffer, " \r\n\t") ]) {
printf("%s> ", prompt); //shows the terminal prompt and restart
continue;
}
//end
if (strlen(inputBuffer) == 4 && (strncmp(inputBuffer, "exit", 4) == 0 || strncmp(inputBuffer, "quit", 4) == 0)) {
break;
}
//parse the input, prep the VM for execution
Toy_Lexer lexer;
Toy_bindLexer(&lexer, inputBuffer);
Toy_Parser parser;
Toy_bindParser(&parser, &lexer);
Toy_Ast* ast = Toy_scanParser(&bucket, &parser); //Ast is in the bucket, so it doesn't need to be freed
//parsing error, retry
if (parser.error) {
printf("%s> ", prompt); //shows the terminal prompt
continue;
}
unsigned char* bytecode = Toy_compileToBytecode(ast);
Toy_bindVM(&vm, bytecode, runCount++ > 0);
//run
Toy_runVM(&vm);
//free the memory, and leave the VM ready for the next loop
Toy_resetVM(&vm, true);
free(bytecode);
printf("%s> ", prompt); //shows the terminal prompt
}
//cleanup all memory
Toy_freeVM(&vm);
Toy_freeBucket(&bucket);
return 0;
}
//debugging //debugging
static void debugStackPrint(Toy_Stack* stack) { static void debugStackPrint(Toy_Stack* stack) {
//DEBUG: if there's anything on the stack, print it //DEBUG: if there's anything on the stack, print it
@@ -385,6 +313,87 @@ static void debugScopePrint(Toy_Scope* scope, int depth) {
} }
} }
//repl function
int repl(const char* filepath, bool verbose) {
//output options
Toy_setPrintCallback(printCallback);
Toy_setErrorCallback(errorAndContinueCallback);
Toy_setAssertFailureCallback(assertFailureAndContinueCallback);
//vars to use
char prompt[256];
getFileName(prompt, filepath);
unsigned int INPUT_BUFFER_SIZE = 4096;
char inputBuffer[INPUT_BUFFER_SIZE];
memset(inputBuffer, 0, INPUT_BUFFER_SIZE);
Toy_Bucket* bucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
Toy_VM vm;
Toy_initVM(&vm);
printf("%s> ", prompt); //shows the terminal prompt and begin
unsigned int runCount = 0; //used for initial preserveScope
//read from the terminal
while(fgets(inputBuffer, INPUT_BUFFER_SIZE, stdin)) {
//work around fgets() adding a newline
unsigned int length = strlen(inputBuffer);
if (inputBuffer[length - 1] == '\n') {
inputBuffer[--length] = '\0';
}
if (length == 0 || !inputBuffer[ strspn(inputBuffer, " \r\n\t") ]) {
printf("%s> ", prompt); //shows the terminal prompt and restart
continue;
}
//end
if (strlen(inputBuffer) == 4 && (strncmp(inputBuffer, "exit", 4) == 0 || strncmp(inputBuffer, "quit", 4) == 0)) {
break;
}
//parse the input, prep the VM for execution
Toy_Lexer lexer;
Toy_bindLexer(&lexer, inputBuffer);
Toy_Parser parser;
Toy_bindParser(&parser, &lexer);
Toy_Ast* ast = Toy_scanParser(&bucket, &parser); //Ast is in the bucket, so it doesn't need to be freed
//parsing error, retry
if (parser.error) {
printf("%s> ", prompt); //shows the terminal prompt
continue;
}
unsigned char* bytecode = Toy_compileToBytecode(ast);
Toy_bindVM(&vm, bytecode, runCount++ > 0);
//run
Toy_runVM(&vm);
//print the debug info
if (verbose) {
debugStackPrint(vm.stack);
debugScopePrint(vm.scope, 0);
inspect_bytecode(bytecode);
}
//free the memory, and leave the VM ready for the next loop
Toy_resetVM(&vm, true);
free(bytecode);
printf("%s> ", prompt); //shows the terminal prompt
}
//cleanup all memory
Toy_freeVM(&vm);
Toy_freeBucket(&bucket);
return 0;
}
//main file //main file
int main(int argc, const char* argv[]) { int main(int argc, const char* argv[]) {
Toy_setPrintCallback(printCallback); Toy_setPrintCallback(printCallback);
@@ -462,9 +471,10 @@ int main(int argc, const char* argv[]) {
Toy_runVM(&vm); Toy_runVM(&vm);
//print the debug info //print the debug info
if (cmd.verboseDebugPrint) { //URGENT: 'verbose' option is mainly for the WIP elements, like decompiler if (cmd.verbose) {
debugStackPrint(vm.stack); debugStackPrint(vm.stack);
debugScopePrint(vm.scope, 0); debugScopePrint(vm.scope, 0);
inspect_bytecode(bytecode);
} }
//cleanup //cleanup
@@ -472,7 +482,7 @@ int main(int argc, const char* argv[]) {
free(bytecode); free(bytecode);
} }
else { else {
repl(argv[0]); repl(argv[0], cmd.verbose);
} }
return 0; return 0;

View File

@@ -1,20 +1,5 @@
/* print "hello world";
{
fn hello() {
print "Hello world";
}
hello();
}
*/
{
fn hello(arg) {
print arg;
}
hello("world");
}
print [];

View File

@@ -62,7 +62,7 @@ static void processRead(Toy_VM* vm) {
} }
case TOY_VALUE_STRING: { case TOY_VALUE_STRING: {
enum Toy_StringType stringType = READ_BYTE(vm); Toy_StringType stringType = READ_BYTE(vm);
int len = (int)READ_BYTE(vm); //WARN: only used for name strings int len = (int)READ_BYTE(vm); //WARN: only used for name strings
(void)len; (void)len;