From 083ee950ddb4776be1c2cde8c12fe618c37df37f Mon Sep 17 00:00:00 2001 From: Kayne Ruse Date: Thu, 19 Sep 2024 12:45:35 +1000 Subject: [PATCH] WIP bytecode and routine, read more The tests are failing in a strange way, with the error message 'corrupted top size'. I don't know what it means, and it seems to be caused by a call to printf() within 'test_bytecode.c'. I need a break, as this is making me dizzy. --- .notes/bytecode-format.txt | 4 +-- source/toy_bytecode.c | 67 +++++++++++++++++++++++++++++++++++++ source/toy_bytecode.h | 13 +++++++ source/toy_common.c | 2 +- source/toy_routine.c | 58 ++++++++++++++++++++++++++++++++ source/toy_routine.h | 28 ++++++++++++++++ tests/cases/test_bytecode.c | 53 +++++++++++++++++++++++++++++ 7 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 source/toy_bytecode.c create mode 100644 source/toy_bytecode.h create mode 100644 source/toy_routine.c create mode 100644 source/toy_routine.h create mode 100644 tests/cases/test_bytecode.c diff --git a/.notes/bytecode-format.txt b/.notes/bytecode-format.txt index c7b93e8..566c1e3 100644 --- a/.notes/bytecode-format.txt +++ b/.notes/bytecode-format.txt @@ -41,7 +41,7 @@ Additional information may be added later, or multiple 'modules' listed sequenti N .param count # the number of parameter fields expected N .data count # the number of data fields expected N .routine count # the number of routines present - .param start # absolute address of .param; omitted if not needed + .param start # absolute addess of .param; omitted if not needed .code start # absolute address of .code; mandatory .datatable start # absolute address of .datatable; omitted if not needed .data start # absolute address of .data; omitted if not needed @@ -64,7 +64,7 @@ Additional information may be added later, or multiple 'modules' listed sequenti .data: # data that can't really be embedded into .code - "Hello world" + ,"Hello world" .routines: # inner routines, each of which conforms to this spec diff --git a/source/toy_bytecode.c b/source/toy_bytecode.c new file mode 100644 index 0000000..a834152 --- /dev/null +++ b/source/toy_bytecode.c @@ -0,0 +1,67 @@ +#include "toy_bytecode.h" + +#include "toy_memory.h" + +#include +#include + +//utils +static void expand(Toy_Bytecode* bc, int amount) { + if (bc->count + amount > bc->capacity) { + int oldCapacity = bc->capacity; + + bc->capacity = TOY_GROW_CAPACITY(oldCapacity); + bc->ptr = TOY_GROW_ARRAY(unsigned char, bc->ptr, oldCapacity, bc->capacity); + } +} + +static void emitByte(Toy_Bytecode* bc, unsigned char byte) { + expand(bc, 1); + bc->ptr[bc->count++] = byte; +} + +static void writeModule(Toy_Bytecode* bc, Toy_Ast* ast) { + // +} + +//bytecode +static void writeBytecodeHeader(Toy_Bytecode* bc) { + emitByte(bc, TOY_VERSION_MAJOR); + emitByte(bc, TOY_VERSION_MINOR); + emitByte(bc, TOY_VERSION_PATCH); + + //check strlen for the build string + const char* build = Toy_private_version_build(); + int len = (int)strlen(build) + 1; + + expand(bc, len); + sprintf((char*)(bc->ptr + bc->count), "%.*s", len, build); + bc->count += len; +} + +static void writeBytecodeBody(Toy_Bytecode* bc, Toy_Ast* ast) { + //a 'module' is a routine that runs at the root-level of a file + //since routines can be recursive, this distinction is important + //eventually, the bytecode may support multiple modules packed into one file + writeModule(bc, ast); +} + +//exposed functions +Toy_Bytecode Toy_compileBytecode(Toy_Ast* ast) { + //setup + Toy_Bytecode bc; + + bc.ptr = NULL; + bc.capacity = 0; + bc.count = 0; + + //build + writeBytecodeHeader(&bc); + writeBytecodeBody(&bc, ast); + + return bc; +} + +void Toy_freeBytecode(Toy_Bytecode bc) { + TOY_FREE_ARRAY(unsigned char, bc.ptr, bc.capacity); +} diff --git a/source/toy_bytecode.h b/source/toy_bytecode.h new file mode 100644 index 0000000..bbfe4e4 --- /dev/null +++ b/source/toy_bytecode.h @@ -0,0 +1,13 @@ +#pragma once + +#include "toy_common.h" +#include "toy_ast.h" + +typedef struct Toy_Bytecode { + unsigned char* ptr; + int capacity; + int count; +} Toy_Bytecode; + +TOY_API Toy_Bytecode Toy_compileBytecode(Toy_Ast* ast); +TOY_API void Toy_freeBytecode(Toy_Bytecode bc); diff --git a/source/toy_common.c b/source/toy_common.c index 3a57f99..f96c347 100644 --- a/source/toy_common.c +++ b/source/toy_common.c @@ -1,7 +1,7 @@ #include "toy_common.h" //defined separately, as compilation can take several seconds, invalidating the comparisons of the given macros -static const char* build = __DATE__ " " __TIME__ ";incomplete dev branch"; +static const char* build = __DATE__ " " __TIME__ ", Toy branch 'dev'"; const char* Toy_private_version_build() { return build; diff --git a/source/toy_routine.c b/source/toy_routine.c new file mode 100644 index 0000000..1cb346c --- /dev/null +++ b/source/toy_routine.c @@ -0,0 +1,58 @@ +#include "toy_routine.h" + +#include "toy_memory.h" + +#include +#include + +//utils +static void expand(void** handle, int* capacity, int* count) { + if ((*count) +1 > (*capacity)) { + int oldCapacity = (*capacity); + + (*capacity) = TOY_GROW_CAPACITY(oldCapacity); + (*handle) = TOY_GROW_ARRAY(unsigned char, (*handle), oldCapacity, (*capacity)); + } +} + +static void emitByte(void** handle, int* capacity, int* count, unsigned char byte) { + expand(handle, capacity, count); + ((unsigned char*)(*handle))[(*count)++] = byte; +} + +//routine +//TODO + +//exposed functions +Toy_Routine Toy_compileRoutine(Toy_Ast* ast) { + //setup + Toy_Routine rt; + + rt.param = NULL; + rt.paramCapacity = 0; + rt.paramCount = 0; + + rt.code = NULL; + rt.codeCapacity = 0; + rt.codeCount = 0; + + rt.data = NULL; + rt.dataCapacity = 0; + rt.dataCount = 0; + + rt.jump = NULL; + rt.jumpCapacity = 0; + rt.jumpCount = 0; + + //build + //TODO + + return rt; +} + +void freeRoutine(Toy_Routine rt) { + TOY_FREE_ARRAY(unsigned char, rt.param, rt.paramCapacity); + TOY_FREE_ARRAY(unsigned char, rt.code, rt.codeCapacity); + TOY_FREE_ARRAY(unsigned char, rt.data, rt.dataCapacity); + TOY_FREE_ARRAY(int, rt.jump, rt.jumpCapacity); +} \ No newline at end of file diff --git a/source/toy_routine.h b/source/toy_routine.h new file mode 100644 index 0000000..b1e909b --- /dev/null +++ b/source/toy_routine.h @@ -0,0 +1,28 @@ +#pragma once + +#include "toy_common.h" +#include "toy_ast.h" + +//routine - holds the individual parts of a compiled routine +typedef struct Toy_Routine { + unsigned char* param; //c-string params in sequence + int paramCapacity; + int paramCount; + + unsigned char* code; //the instruction set + int codeCapacity; + int codeCount; + + unsigned char* data; //{type,val} tuples of data + int dataCapacity; + int dataCount; + + int* jump; //each 'jump' is the starting address of an element within 'data' + int jumpCapacity; + int jumpCount; + + //TODO: duplicate the data and jumps for subroutines +} Toy_Routine; + +TOY_API Toy_Routine Toy_compileRoutine(Toy_Ast* ast); +TOY_API void Toy_freeRoutine(Toy_Routine routine); diff --git a/tests/cases/test_bytecode.c b/tests/cases/test_bytecode.c new file mode 100644 index 0000000..4694426 --- /dev/null +++ b/tests/cases/test_bytecode.c @@ -0,0 +1,53 @@ +#include "toy_bytecode.h" +#include "toy_console_colors.h" + +#include +#include + +//tests +int test_bytecode_header(Toy_Bucket* bucket) { + //simple test to ensure the header looks right + { + //setup + Toy_Ast* ast = NULL; + Toy_private_emitAstPass(&bucket, &ast); + + //run + Toy_Bytecode bc = Toy_compileBytecode(ast); + + //check + if (bc.ptr[0] != TOY_VERSION_MAJOR || + bc.ptr[1] != TOY_VERSION_MINOR || + bc.ptr[2] != TOY_VERSION_PATCH || + strcmp((char*)(bc.ptr + 3), TOY_VERSION_BUILD) != 0) + { + fprintf(stderr, TOY_CC_ERROR "ERROR: failed to write the bytecode header correctly:\n" TOY_CC_RESET); + fprintf(stderr, TOY_CC_ERROR "\t%d.%d.%d.%s\n" TOY_CC_RESET, bc.ptr[0], bc.ptr[1], bc.ptr[2], (char*)(bc.ptr + 3)); + fprintf(stderr, TOY_CC_ERROR "\t%d.%d.%d.%s\n" TOY_CC_RESET, TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH, TOY_VERSION_BUILD); + return -1; + } + + //cleanup + Toy_freeBytecode(bc); + } + + return 0; +} + +int main() { + //run each test set, returning the total errors given + int total = 0, res = 0; + + { + Toy_Bucket* bucket = NULL; + TOY_BUCKET_INIT(Toy_Ast, bucket, 32); + res = test_bytecode_header(bucket); + TOY_BUCKET_FREE(bucket); + if (res == 0) { + printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET); + } + total += res; + } + + return total; +} \ No newline at end of file