Fixed a 'malformed assignment' issue, read more

I've also added some support for compiler errors in general, but these
will get expanded on later.

I've also quickly added a valgrind option to the tests and found a few
leaks. I'll deal with these later.

Summary of changes:

* Clarified the lifetime of the bytecode in memory
* Erroneous routines exit without compiling
* Empty VMs don't run
* Added a check for malformed assignments
* Renamed "routine" to "module" within the VM
* VM no longer tries to free the bytecode - must be done manually
* Started experimenting with valgrind, not yet ready
This commit is contained in:
2024-11-16 21:02:37 +11:00
parent 04f0653595
commit 2f9489d5fd
12 changed files with 164 additions and 75 deletions

View File

@@ -57,11 +57,17 @@ static void writeBytecodeBody(Toy_Bytecode* bc, Toy_Ast* ast) {
//eventually, the bytecode may support multiple modules packed into one file
void* module = Toy_compileRoutine(ast);
//don't try writing an empty module
if (module == NULL) {
return;
}
size_t len = (size_t)(((int*)module)[0]);
expand(bc, len);
memcpy(bc->ptr + bc->count, module, len);
bc->count += len;
bc->moduleCount++;
}
//exposed functions
@@ -73,9 +79,11 @@ Toy_Bytecode Toy_compileBytecode(Toy_Ast* ast) {
bc.capacity = 0;
bc.count = 0;
bc.moduleCount = 0;
//build
writeBytecodeHeader(&bc);
writeBytecodeBody(&bc, ast);
writeBytecodeBody(&bc, ast); //TODO: implement module packing
return bc;
}

View File

@@ -7,6 +7,8 @@ typedef struct Toy_Bytecode {
unsigned char* ptr;
unsigned int capacity;
unsigned int count;
unsigned int moduleCount;
} Toy_Bytecode;
TOY_API Toy_Bytecode Toy_compileBytecode(Toy_Ast* ast);

View File

@@ -329,6 +329,22 @@ static unsigned int writeInstructionVarDeclare(Toy_Routine** rt, Toy_AstVarDecla
static unsigned int writeInstructionAssign(Toy_Routine** rt, Toy_AstVarAssign ast) {
unsigned int result = 0;
//don't treat these as valid values
switch (ast.expr->type) {
case TOY_AST_BLOCK:
case TOY_AST_COMPOUND:
case TOY_AST_ASSERT:
case TOY_AST_PRINT:
case TOY_AST_VAR_DECLARE:
//emit a compiler error, set the panic flag and skip out
fprintf(stderr, TOY_CC_ERROR "COMPILER ERROR: Invalid AST type found: Malformed assignment\n" TOY_CC_RESET);
(*rt)->panic = true;
return 0;
default:
break;
}
//name, duplicate, right, opcode
if (ast.flag == TOY_AST_FLAG_ASSIGN) {
EMIT_BYTE(rt, code, TOY_OPCODE_READ);
@@ -473,6 +489,11 @@ static unsigned int writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast) {
return 0;
}
//if an error occured, just exit
if (rt == NULL || (*rt) == NULL || (*rt)->panic) {
return 0;
}
unsigned int result = 0;
//determine how to write each instruction based on the Ast
@@ -542,19 +563,17 @@ static unsigned int writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast) {
//meta instructions are disallowed
case TOY_AST_PASS:
//NOTE: this should be disallowed, but for now it's required for testing
// fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST type found: Unknown pass\n" TOY_CC_RESET);
// exit(-1);
//NOTE: this *should* be disallowed, but for now it's required for testing
break;
case TOY_AST_ERROR:
fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST type found: Unknown error\n" TOY_CC_RESET);
exit(-1);
fprintf(stderr, TOY_CC_ERROR "COMPILER ERROR: Invalid AST type found: Unknown 'error'\n" TOY_CC_RESET);
(*rt)->panic = true;
break;
case TOY_AST_END:
fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST type found: Unknown end\n" TOY_CC_RESET);
exit(-1);
fprintf(stderr, TOY_CC_ERROR "COMPILER ERROR: Invalid AST type found: Unknown 'end'\n" TOY_CC_RESET);
(*rt)->panic = true;
break;
}
@@ -562,8 +581,6 @@ static unsigned int writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast) {
}
static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
//build the routine's parts
//TODO: param
//code
writeRoutineCode(&rt, ast);
EMIT_BYTE(&rt, code, TOY_OPCODE_RETURN); //temp terminator
@@ -571,6 +588,11 @@ static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
EMIT_BYTE(&rt, code, 0);
EMIT_BYTE(&rt, code, 0);
//if an error occurred, just exit
if (rt->panic) {
return NULL;
}
//write the header and combine the parts
void* buffer = NULL;
unsigned int capacity = 0, count = 0;
@@ -667,10 +689,11 @@ void* Toy_compileRoutine(Toy_Ast* ast) {
rt.subsCapacity = 0;
rt.subsCount = 0;
rt.panic = false;
//build
void * buffer = writeRoutine(&rt, ast);
//cleanup the temp object
free(rt.param);
free(rt.code);

View File

@@ -24,6 +24,8 @@ typedef struct Toy_Routine {
unsigned char* subs; //subroutines, recursively
unsigned int subsCapacity;
unsigned int subsCount;
bool panic; //any issues found at this point are compilation errors
} Toy_Routine;
TOY_API void* Toy_compileRoutine(Toy_Ast* ast);

View File

@@ -12,16 +12,16 @@
//utilities
#define READ_BYTE(vm) \
vm->routine[vm->routineCounter++]
vm->module[vm->programCounter++]
#define READ_UNSIGNED_INT(vm) \
*((unsigned int*)(vm->routine + readPostfixUtil(&(vm->routineCounter), 4)))
*((unsigned int*)(vm->module + readPostfixUtil(&(vm->programCounter), 4)))
#define READ_INT(vm) \
*((int*)(vm->routine + readPostfixUtil(&(vm->routineCounter), 4)))
*((int*)(vm->module + readPostfixUtil(&(vm->programCounter), 4)))
#define READ_FLOAT(vm) \
*((float*)(vm->routine + readPostfixUtil(&(vm->routineCounter), 4)))
*((float*)(vm->module + readPostfixUtil(&(vm->programCounter), 4)))
static inline int readPostfixUtil(unsigned int* ptr, int amount) {
int ret = *ptr;
@@ -31,7 +31,7 @@ static inline int readPostfixUtil(unsigned int* ptr, int amount) {
static inline void fixAlignment(Toy_VM* vm) {
//NOTE: It's a tilde, not a negative sign
vm->routineCounter = (vm->routineCounter + 3) & ~0b11;
vm->programCounter = (vm->programCounter + 3) & ~0b11;
}
//instruction handlers
@@ -68,10 +68,10 @@ static void processRead(Toy_VM* vm) {
int len = (int)READ_BYTE(vm);
//grab the jump as an integer
unsigned int jump = vm->routine[ vm->jumpsAddr + READ_INT(vm) ];
unsigned int jump = vm->module[ vm->jumpsAddr + READ_INT(vm) ];
//jumps are relative to the data address
char* cstring = (char*)(vm->routine + vm->dataAddr + jump);
char* cstring = (char*)(vm->module + vm->dataAddr + jump);
//build a string from the data section
if (stringType == TOY_STRING_LEAF) {
@@ -142,10 +142,10 @@ static void processDeclare(Toy_VM* vm) {
bool constant = READ_BYTE(vm); //constness
//grab the jump
unsigned int jump = *(unsigned int*)(vm->routine + vm->jumpsAddr + READ_INT(vm));
unsigned int jump = *(unsigned int*)(vm->module + vm->jumpsAddr + READ_INT(vm));
//grab the data
char* cstring = (char*)(vm->routine + vm->dataAddr + jump);
char* cstring = (char*)(vm->module + vm->dataAddr + jump);
//build the name string
Toy_String* name = Toy_createNameStringLength(&vm->stringBucket, cstring, len, type, constant);
@@ -166,7 +166,7 @@ static void processAssign(Toy_VM* vm) {
Toy_Value name = Toy_popStack(&vm->stack);
//check name string type
if (!TOY_VALUE_IS_STRING(name) && TOY_VALUE_AS_STRING(name)->type != TOY_STRING_NAME) {
if (!TOY_VALUE_IS_STRING(name) || TOY_VALUE_AS_STRING(name)->type != TOY_STRING_NAME) {
Toy_error("Invalid assignment target");
return;
}
@@ -596,18 +596,18 @@ void Toy_initVM(Toy_VM* vm) {
Toy_resetVM(vm);
}
void Toy_bindVM(Toy_VM* vm, unsigned char* bytecode) {
if (bytecode[0] != TOY_VERSION_MAJOR || bytecode[1] > TOY_VERSION_MINOR) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Wrong bytecode version found: expected %d.%d.%d found %d.%d.%d, exiting\n" TOY_CC_RESET, TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH, bytecode[0], bytecode[1], bytecode[2]);
void Toy_bindVM(Toy_VM* vm, struct Toy_Bytecode* bc) {
if (bc->ptr[0] != TOY_VERSION_MAJOR || bc->ptr[1] > TOY_VERSION_MINOR) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Wrong bytecode version found: expected %d.%d.%d found %d.%d.%d, exiting\n" TOY_CC_RESET, TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH, bc->ptr[0], bc->ptr[1], bc->ptr[2]);
exit(-1);
}
if (bytecode[2] != TOY_VERSION_PATCH) {
fprintf(stderr, TOY_CC_WARN "WARNING: Wrong bytecode version found: expected %d.%d.%d found %d.%d.%d, continuing\n" TOY_CC_RESET, TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH, bytecode[0], bytecode[1], bytecode[2]);
if (bc->ptr[2] != TOY_VERSION_PATCH) {
fprintf(stderr, TOY_CC_WARN "WARNING: Wrong bytecode version found: expected %d.%d.%d found %d.%d.%d, continuing\n" TOY_CC_RESET, TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH, bc->ptr[0], bc->ptr[1], bc->ptr[2]);
}
if (strcmp((char*)(bytecode + 3), TOY_VERSION_BUILD) != 0) {
fprintf(stderr, TOY_CC_WARN "WARNING: Wrong bytecode build info found: expected '%s' found '%s', continuing\n" TOY_CC_RESET, TOY_VERSION_BUILD, (char*)(bytecode + 3));
if (strcmp((char*)(bc->ptr + 3), TOY_VERSION_BUILD) != 0) {
fprintf(stderr, TOY_CC_WARN "WARNING: Wrong bytecode build info found: expected '%s' found '%s', continuing\n" TOY_CC_RESET, TOY_VERSION_BUILD, (char*)(bc->ptr + 3));
}
//offset by the header size
@@ -616,18 +616,17 @@ void Toy_bindVM(Toy_VM* vm, unsigned char* bytecode) {
offset += 4 - (offset % 4); //ceil
}
//delegate
Toy_bindVMToRoutine(vm, bytecode + offset);
//cache these
vm->bc = bytecode;
if (bc->moduleCount != 0) { //tmp check, just in case the bytecode is empty; will rework this when module packing works
//delegate to a more specialized function
Toy_bindVMToModule(vm, bc->ptr + offset);
}
}
void Toy_bindVMToRoutine(Toy_VM* vm, unsigned char* routine) {
vm->routine = routine;
void Toy_bindVMToModule(Toy_VM* vm, unsigned char* module) {
vm->module = module;
//read the header metadata
vm->routineSize = READ_UNSIGNED_INT(vm);
vm->moduleSize = READ_UNSIGNED_INT(vm);
vm->paramSize = READ_UNSIGNED_INT(vm);
vm->jumpsSize = READ_UNSIGNED_INT(vm);
vm->dataSize = READ_UNSIGNED_INT(vm);
@@ -663,10 +662,15 @@ void Toy_bindVMToRoutine(Toy_VM* vm, unsigned char* routine) {
}
void Toy_runVM(Toy_VM* vm) {
//NO-OP on empty VMs
if (vm->module == NULL) {
return;
}
//TODO: read params into scope
//prep the routine counter for execution
vm->routineCounter = vm->codeAddr;
//prep the program counter for execution
vm->programCounter = vm->codeAddr;
//begin
process(vm);
@@ -679,17 +683,12 @@ void Toy_freeVM(Toy_VM* vm) {
Toy_freeBucket(&vm->stringBucket);
Toy_freeBucket(&vm->scopeBucket);
//free the bytecode
free(vm->bc);
Toy_resetVM(vm);
}
void Toy_resetVM(Toy_VM* vm) {
vm->bc = NULL;
vm->routine = NULL;
vm->routineSize = 0;
vm->module = NULL;
vm->moduleSize = 0;
vm->paramSize = 0;
vm->jumpsSize = 0;
@@ -702,7 +701,7 @@ void Toy_resetVM(Toy_VM* vm) {
vm->dataAddr = 0;
vm->subsAddr = 0;
vm->routineCounter = 0;
vm->programCounter = 0;
//NOTE: stack, scope and memory are not altered during resets
}

View File

@@ -2,17 +2,15 @@
#include "toy_common.h"
#include "toy_bytecode.h"
#include "toy_bucket.h"
#include "toy_stack.h"
#include "toy_scope.h"
typedef struct Toy_VM {
//hold the raw bytecode
unsigned char* bc;
//raw instructions to be executed
unsigned char* routine;
unsigned int routineSize;
unsigned char* module;
unsigned int moduleSize;
unsigned int paramSize;
unsigned int jumpsSize;
@@ -25,7 +23,7 @@ typedef struct Toy_VM {
unsigned int dataAddr;
unsigned int subsAddr;
unsigned int routineCounter;
unsigned int programCounter;
//stack - immediate-level values only
Toy_Stack* stack;
@@ -41,8 +39,8 @@ typedef struct Toy_VM {
} Toy_VM;
TOY_API void Toy_initVM(Toy_VM* vm);
TOY_API void Toy_bindVM(Toy_VM* vm, unsigned char* bytecode); //process the version data
TOY_API void Toy_bindVMToRoutine(Toy_VM* vm, unsigned char* routine); //process the routine only
TOY_API void Toy_bindVM(Toy_VM* vm, struct Toy_Bytecode* bc); //process the version data
TOY_API void Toy_bindVMToModule(Toy_VM* vm, unsigned char* module); //process the module only
TOY_API void Toy_runVM(Toy_VM* vm);
TOY_API void Toy_freeVM(Toy_VM* vm);