Started working on a decompiler, called 'bytecode inspector'

It only has a few instructions for now, but I can flesh it out over
time.
This commit is contained in:
2026-04-11 13:37:26 +10:00
parent 49a825aaf9
commit b32ea9f309
5 changed files with 273 additions and 97 deletions

178
repl/bytecode_inspector.c Normal file
View File

@@ -0,0 +1,178 @@
#include "bytecode_inspector.h"
#include "toy_console_colors.h"
#include "toy_opcodes.h"
#include "toy_value.h"
#include "toy_string.h"
#include <stdio.h>
#include <stdlib.h>
int inspect_instruction(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr);
int inspect_read(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr);
// void inspect_jumps(unsigned char* bytecode, unsigned int pc, unsigned int size);
// void inspect_param(unsigned char* bytecode, unsigned int pc, unsigned int size);
// void inspect_data(unsigned char* bytecode, unsigned int pc, unsigned int size);
// void inspect_subs(unsigned char* bytecode, unsigned int pc, unsigned int size);
#define MARKER_VALUE(pc, type) \
(pc * sizeof(type))
#define MARKER "\033[" TOY_CC_FONT_BLACK "m" " %lu\t" TOY_CC_RESET
//exposed functions
void inspect_bytecode(unsigned char* bytecode) {
//TODO: handle version info
unsigned int const header_size = 0;
unsigned int const header_jumps = 1;
unsigned int const header_param = 2;
unsigned int const header_data = 3;
unsigned int const header_subs = 4;
//header size
printf(MARKER TOY_CC_NOTICE "Bytecode Size: \t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_size, unsigned int), ((unsigned int*)(bytecode))[header_size]);
//header counts
printf(MARKER TOY_CC_NOTICE "Jumps Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_jumps, unsigned int), ((unsigned int*)(bytecode))[header_jumps]);
printf(MARKER TOY_CC_NOTICE "Param Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_param, unsigned int), ((unsigned int*)(bytecode))[header_param]);
printf(MARKER TOY_CC_NOTICE "Data Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_data, unsigned int), ((unsigned int*)(bytecode))[header_data]);
printf(MARKER TOY_CC_NOTICE "Subs Size:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(header_subs, unsigned int), ((unsigned int*)(bytecode))[header_subs]);
printf("\n---\n");
//some addresses may be absent
unsigned int addr_pc = 4;
unsigned int code_addr = 0;
unsigned int jumps_addr = 0;
unsigned int param_addr = 0;
unsigned int data_addr = 0;
unsigned int subs_addr = 0;
//header addresses
if (true) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Code Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
code_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_jumps] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Jumps Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
jumps_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_param] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Param Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
param_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_data] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Data Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
data_addr = ((unsigned int*)(bytecode))[addr_pc];
}
if (((unsigned int*)(bytecode))[header_subs] > 0) {
addr_pc++;
printf(MARKER TOY_CC_NOTICE "Subs Address:\t\t%u" TOY_CC_RESET "\n", MARKER_VALUE(addr_pc, unsigned int), ((unsigned int*)(bytecode))[addr_pc]);
subs_addr = ((unsigned int*)(bytecode))[addr_pc];
}
printf("\n---\n");
//read the code instructions one-by-one
unsigned int pc = code_addr;
while(pc < jumps_addr) {
pc += inspect_instruction(bytecode, pc, jumps_addr, data_addr);
}
(void)jumps_addr;
(void)param_addr;
(void)data_addr;
(void)subs_addr;
}
int inspect_instruction(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr) {
//read and print the opcode instruction at 'pc'
Toy_OpcodeType opcode = bytecode[pc];
switch(opcode) {
case TOY_OPCODE_READ:
return inspect_read(bytecode, pc, jumps_addr, data_addr);
case TOY_OPCODE_RETURN:
printf(MARKER "Keyword RETURN (%u)\n", MARKER_VALUE(pc, unsigned char), bytecode[pc + 1]);
return 4;
case TOY_OPCODE_PRINT:
printf(MARKER "Keyword PRINT\n", MARKER_VALUE(pc, unsigned char));
return 4;
default:
printf(MARKER TOY_CC_WARN "Unknown Word: [%u, %u, %u, %u]" TOY_CC_RESET "\n", MARKER_VALUE(pc, unsigned char), bytecode[pc], bytecode[pc+1], bytecode[pc+2], bytecode[pc+3]);
return 4;
}
}
int inspect_read(unsigned char* bytecode, unsigned int pc, unsigned int jumps_addr, unsigned int data_addr) {
Toy_ValueType type = bytecode[pc + 1];
switch(type) {
case TOY_VALUE_NULL: {
printf(MARKER "READ NULL\n", MARKER_VALUE(pc, unsigned char));
return 4;
}
case TOY_VALUE_BOOLEAN: {
if (bytecode[pc + 2]) {
printf(MARKER "READ BOOL true\n", MARKER_VALUE(pc, unsigned char));
}
else {
}
return 4;
}
case TOY_VALUE_INTEGER: {
int i = *(int*)(bytecode + pc + 4);
printf(MARKER "READ INTEGER %d\n", MARKER_VALUE(pc, unsigned char), i);
return 8;
}
case TOY_VALUE_FLOAT: {
float i = *(float*)(bytecode + pc + 4);
printf(MARKER "READ FLOAT %f\n", MARKER_VALUE(pc, unsigned char), i);
return 8;
}
case TOY_VALUE_STRING: {
Toy_StringType stringType = (Toy_StringType)(*(bytecode + pc + 2)); //TODO: not needed?
int len = bytecode[pc + 3]; //only used for names?
(void)stringType;
unsigned int indexValue = *((unsigned int*)(bytecode + pc + 4));
unsigned int jumpValue = *((unsigned int*)(bytecode + jumps_addr + indexValue));
char* cstr = ((char*)(bytecode + data_addr + jumpValue));
printf(MARKER "READ STRING (%d) %s\n", MARKER_VALUE(pc, unsigned char), len, cstr);
return 8;
}
case TOY_VALUE_ARRAY:
case TOY_VALUE_TABLE:
case TOY_VALUE_FUNCTION:
case TOY_VALUE_OPAQUE:
case TOY_VALUE_ANY:
case TOY_VALUE_UNKNOWN:
default: {
printf(MARKER "READ ???\n", MARKER_VALUE(pc, unsigned char));
return 4;
}
}
}