diff --git a/.notes/SECD-concept.txt b/.notes/SECD-concept.txt index cbc4990..616a35a 100644 --- a/.notes/SECD-concept.txt +++ b/.notes/SECD-concept.txt @@ -28,24 +28,17 @@ Things to consider later: === -//general instructions +//variable instructions READ read one value from C onto S LOAD read one value from .data onto S -ASSERT - if S(-1) is falsy, print S(0) and exit -PRINT - pop S(0), and print the output -SET - read one word from C, saves the key E[SYMBOL(word)] to the value S(0), popping S(0) -GET - read one word from C, finds the value of E[SYMBOL(word)], leaves the value on S DECLARE read two words from C, create a new entry in E with the key E[SYMBOL(word1)], the type defined by word2, the value 'null' DEFINE - read two words from C, create a new entry in E with the key E[SYMBOL(word1)], the type defined by word2, the value popped from S(0) - + read one word from C, saves the pre-existing key E[SYMBOL(word)] to the value S(0), popping S(0) +ACCESS + read one word from C, finds the pre-existing value of E[SYMBOL(word)], leaves the value on S //arithmetic instructions ADD @@ -80,7 +73,7 @@ OR pops S(-1) and S(0), replacing it with TRUE or FALSE, depending on truthiness TRUTHY pops S(0), replacing it with TRUE or FALSE, depending on truthiness -INVERT +NEGATE pops S(0), replacing it with TRUE or FALSE, depending on truthiness @@ -92,10 +85,13 @@ JUMP_IF_FALSE FN_CALL *read a list of arguments specified in C into 'A', store (S, E, C, D) as D, push S, move the stack pointer to the specified routine, push a new E based on the contents of 'A' FN_RETURN -This *read a list of return values specified in C into 'R', pop S, restore (S, E, C, D) from D(0) popping it, store the contents of 'R' in E or S based on the next few parts of C //bespoke utility instructions +ASSERT + if S(-1) is falsy, print S(0) and exit +PRINT + pop S(0), and print the output IMPORT //invoke an external library into the current scope CONCAT diff --git a/.notes/bytecode-format.txt b/.notes/bytecode-format.txt index 566c1e3..914b222 100644 --- a/.notes/bytecode-format.txt +++ b/.notes/bytecode-format.txt @@ -37,7 +37,7 @@ Additional information may be added later, or multiple 'modules' listed sequenti # where 'module' can be omitted if it's local to this module ('identifier' within the symbols is calculated at the module level, it's always unique) .header: - total size # size of this routine, including all data and subroutines + N total size # size of this routine, including all data and subroutines N .param count # the number of parameter fields expected N .data count # the number of data fields expected N .routine count # the number of routines present diff --git a/source/toy_ast.h b/source/toy_ast.h index 7d5ff6b..6a7fd4e 100644 --- a/source/toy_ast.h +++ b/source/toy_ast.h @@ -29,12 +29,12 @@ typedef enum Toy_AstFlag { TOY_AST_FLAG_MULTIPLY, TOY_AST_FLAG_DIVIDE, TOY_AST_FLAG_MODULO, + TOY_AST_FLAG_ASSIGN, //TODO: implement the declare statement TOY_AST_FLAG_ADD_ASSIGN, TOY_AST_FLAG_SUBTRACT_ASSIGN, TOY_AST_FLAG_MULTIPLY_ASSIGN, TOY_AST_FLAG_DIVIDE_ASSIGN, TOY_AST_FLAG_MODULO_ASSIGN, - TOY_AST_FLAG_ASSIGN, TOY_AST_FLAG_COMPARE_EQUAL, TOY_AST_FLAG_COMPARE_NOT, TOY_AST_FLAG_COMPARE_LESS, diff --git a/source/toy_bytecode.c b/source/toy_bytecode.c index 8703d97..c971fc0 100644 --- a/source/toy_bytecode.c +++ b/source/toy_bytecode.c @@ -1,4 +1,5 @@ #include "toy_bytecode.h" +#include "toy_console_colors.h" #include "toy_memory.h" #include "toy_routine.h" @@ -21,10 +22,6 @@ static void emitByte(Toy_Bytecode* bc, unsigned char byte) { bc->ptr[bc->count++] = byte; } -static void writeModule(Toy_Bytecode* bc, Toy_Ast* ast) { - //TODO: routines -} - //bytecode static void writeBytecodeHeader(Toy_Bytecode* bc) { emitByte(bc, TOY_VERSION_MAJOR); @@ -36,7 +33,7 @@ static void writeBytecodeHeader(Toy_Bytecode* bc) { int len = (int)strlen(build) + 1; expand(bc, len); - sprintf((char*)(bc->ptr + bc->count), "%.*s", len, build); + memcpy(bc->ptr + bc->count, build, len); bc->count += len; } @@ -44,7 +41,13 @@ static void writeBytecodeBody(Toy_Bytecode* bc, Toy_Ast* ast) { //a 'module' is a routine that runs at the root-level of a file //since routines can be recursive, this distinction is important //eventually, the bytecode may support multiple modules packed into one file - writeModule(bc, ast); + void* module = Toy_compileRoutine(ast); + + int len = ((int*)module)[0]; + + expand(bc, len); + memcpy(bc->ptr + bc->count, module, len); + bc->count += len; } //exposed functions diff --git a/source/toy_opcodes.h b/source/toy_opcodes.h index 07df503..23e5767 100644 --- a/source/toy_opcodes.h +++ b/source/toy_opcodes.h @@ -1,10 +1,40 @@ #pragma once typedef enum Toy_OpcodeType { - // + //variable instructions + TOY_OPCODE_READ, + TOY_OPCODE_LOAD, + TOY_OPCODE_LOAD_LONG, //corner case + TOY_OPCODE_DECLARE, + TOY_OPCODE_ASSIGN, + TOY_OPCODE_ACCESS, + //arithmetic instructions + TOY_OPCODE_ADD, + TOY_OPCODE_SUBTRACT, + TOY_OPCODE_MULTIPLY, + TOY_OPCODE_DIVIDE, + TOY_OPCODE_MODULO, + + //comparison instructions + TOY_OPCODE_COMPARE_EQUAL, + // TOY_OPCODE_COMPARE_NOT, + TOY_OPCODE_COMPARE_LESS, + TOY_OPCODE_COMPARE_LESS_EQUAL, + TOY_OPCODE_COMPARE_GREATER, + TOY_OPCODE_COMPARE_GREATER_EQUAL, + + //logical instructions + TOY_OPCODE_AND, + TOY_OPCODE_OR, + TOY_OPCODE_TRUTHY, + TOY_OPCODE_NEGATE, + + //control instructions + TOY_OPCODE_RETURN, + + //meta instructions TOY_OPCODE_PASS, TOY_OPCODE_ERROR, TOY_OPCODE_EOF, } Toy_OpcodeType; - diff --git a/source/toy_routine.c b/source/toy_routine.c index eaa51af..aedb272 100644 --- a/source/toy_routine.c +++ b/source/toy_routine.c @@ -1,8 +1,12 @@ #include "toy_routine.h" +#include "toy_console_colors.h" #include "toy_memory.h" +#include "toy_opcodes.h" +#include "toy_value.h" #include +#include #include //utils @@ -10,7 +14,7 @@ static void expand(void** handle, int* capacity, int* count, int amount) { while ((*count) + amount > (*capacity)) { int oldCapacity = (*capacity); - (*capacity) = TOY_GROW_CAPACITY(oldCapacity); + (*capacity) = TOY_GROW_CAPACITY(oldCapacity); //TODO: don't need GROW (*handle) = TOY_GROW_ARRAY(unsigned char, (*handle), oldCapacity, (*capacity)); } } @@ -20,11 +24,273 @@ static void emitByte(void** handle, int* capacity, int* count, unsigned char byt ((unsigned char*)(*handle))[(*count)++] = byte; } -//routine -//TODO +static void emitInt(void** handle, int* capacity, int* count, int bytes) { + char* ptr = (char*)&bytes; + emitByte(handle, capacity, count, *(ptr++)); + emitByte(handle, capacity, count, *(ptr++)); + emitByte(handle, capacity, count, *(ptr++)); + emitByte(handle, capacity, count, *(ptr++)); +} + +static void emitFloat(void** handle, int* capacity, int* count, float bytes) { + char* ptr = (char*)&bytes; + emitByte(handle, capacity, count, *(ptr++)); + emitByte(handle, capacity, count, *(ptr++)); + emitByte(handle, capacity, count, *(ptr++)); + emitByte(handle, capacity, count, *(ptr++)); +} + +//write instructions based on the AST types +#define EMIT_BYTE(rt, byte) \ + emitByte((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte); +#define EMIT_INT(rt, code, byte) \ + emitInt((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte); +#define EMIT_FLOAT(rt, code, byte) \ + emitFloat((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte); + +static void writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast); //forward declare for recursion + +static void writeInstructionValue(Toy_Routine** rt, Toy_AstValue ast) { + //TODO: store more complex values in the data code + EMIT_BYTE(rt, TOY_OPCODE_READ); + EMIT_BYTE(rt, ast.value.type); + + //emit the raw value based on the type + if (TOY_VALUE_IS_NULL(ast.value)) { + //NOTHING - null's type data is enough + } + else if (TOY_VALUE_IS_BOOLEAN(ast.value)) { + EMIT_BYTE(rt, TOY_VALUE_AS_BOOLEAN(ast.value)); + } + else if (TOY_VALUE_IS_INTEGER(ast.value)) { + EMIT_INT(rt, code, TOY_VALUE_AS_INTEGER(ast.value)); + } + else if (TOY_VALUE_IS_FLOAT(ast.value)) { + EMIT_FLOAT(rt, code, TOY_VALUE_AS_FLOAT(ast.value)); + } + else { + fprintf(stderr, TOY_CC_ERROR "Invalid AST type found: Unknown value type\n" TOY_CC_RESET); + exit(-1); + } +} + +static void writeInstructionUnary(Toy_Routine** rt, Toy_AstUnary ast) { + //working with a stack means the child gets placed first + writeRoutineCode(rt, ast.child); + + if (ast.flag == TOY_AST_FLAG_NEGATE) { + EMIT_BYTE(rt, TOY_OPCODE_NEGATE); + } + else { + fprintf(stderr, TOY_CC_ERROR "Invalid AST unary flag found\n" TOY_CC_RESET); + exit(-1); + } +} + +static void writeInstructionBinary(Toy_Routine** rt, Toy_AstBinary ast) { + //left, then right, then the binary's operation + writeRoutineCode(rt, ast.left); + writeRoutineCode(rt, ast.right); + + if (ast.flag == TOY_AST_FLAG_ADD) { + EMIT_BYTE(rt, TOY_OPCODE_ADD); + } + if (ast.flag == TOY_AST_FLAG_SUBTRACT) { + EMIT_BYTE(rt, TOY_OPCODE_SUBTRACT); + } + if (ast.flag == TOY_AST_FLAG_MULTIPLY) { + EMIT_BYTE(rt, TOY_OPCODE_MULTIPLY); + } + if (ast.flag == TOY_AST_FLAG_DIVIDE) { + EMIT_BYTE(rt, TOY_OPCODE_DIVIDE); + } + if (ast.flag == TOY_AST_FLAG_MODULO) { + EMIT_BYTE(rt, TOY_OPCODE_MODULO); + } + + // if (ast.flag == TOY_AST_FLAG_ASSIGN) { + // EMIT_BYTE(rt, TOY_OPCODE_ASSIGN); + // //TODO: emit the env symbol to store TOP(S) within + // } + // if (ast.flag == TOY_AST_FLAG_ADD_ASSIGN) { + // EMIT_BYTE(rt, TOY_OPCODE_ADD); + // EMIT_BYTE(rt, TOY_OPCODE_ASSIGN); + // //TODO: emit the env symbol to store TOP(S) within + // } + // if (ast.flag == TOY_AST_FLAG_SUBTRACT_ASSIGN) { + // EMIT_BYTE(rt, TOY_OPCODE_SUBTRACT); + // EMIT_BYTE(rt, TOY_OPCODE_ASSIGN); + // //TODO: emit the env symbol to store TOP(S) within + // } + // if (ast.flag == TOY_AST_FLAG_MULTIPLY_ASSIGN) { + // EMIT_BYTE(rt, TOY_OPCODE_MULTIPLY); + // EMIT_BYTE(rt, TOY_OPCODE_ASSIGN); + // //TODO: emit the env symbol to store TOP(S) within + // } + // if (ast.flag == TOY_AST_FLAG_DIVIDE_ASSIGN) { + // EMIT_BYTE(rt, TOY_OPCODE_DIVIDE); + // EMIT_BYTE(rt, TOY_OPCODE_ASSIGN); + // //TODO: emit the env symbol to store TOP(S) within + // } + // if (ast.flag == TOY_AST_FLAG_MODULO_ASSIGN) { + // EMIT_BYTE(rt, TOY_OPCODE_MODULO); + // EMIT_BYTE(rt, TOY_OPCODE_ASSIGN); + // //TODO: emit the env symbol to store TOP(S) within + // } + + if (ast.flag == TOY_AST_FLAG_COMPARE_EQUAL) { + EMIT_BYTE(rt, TOY_OPCODE_COMPARE_EQUAL); + } + if (ast.flag == TOY_AST_FLAG_COMPARE_NOT) { + EMIT_BYTE(rt, TOY_OPCODE_COMPARE_EQUAL); + EMIT_BYTE(rt, TOY_OPCODE_NEGATE); + } + if (ast.flag == TOY_AST_FLAG_COMPARE_LESS) { + EMIT_BYTE(rt, TOY_OPCODE_COMPARE_LESS); + } + if (ast.flag == TOY_AST_FLAG_COMPARE_LESS_EQUAL) { + EMIT_BYTE(rt, TOY_OPCODE_COMPARE_LESS_EQUAL); + } + if (ast.flag == TOY_AST_FLAG_COMPARE_GREATER) { + EMIT_BYTE(rt, TOY_OPCODE_COMPARE_GREATER); + } + if (ast.flag == TOY_AST_FLAG_COMPARE_GREATER_EQUAL) { + EMIT_BYTE(rt, TOY_OPCODE_COMPARE_GREATER_EQUAL); + } + + if (ast.flag == TOY_AST_FLAG_AND) { + EMIT_BYTE(rt, TOY_OPCODE_AND); + } + if (ast.flag == TOY_AST_FLAG_OR) { + EMIT_BYTE(rt, TOY_OPCODE_OR); + } + else { + fprintf(stderr, TOY_CC_ERROR "Invalid AST binary flag found\n" TOY_CC_RESET); + exit(-1); + } +} + +//routine structure +// static void writeRoutineParam(Toy_Routine* rt) { +// // +// } + +static void writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast) { + if (ast == NULL) { + return; + } + + //determine how to write each instruction based on the Ast + switch(ast->type) { + case TOY_AST_BLOCK: + writeRoutineCode(rt, ast->block.child); + writeRoutineCode(rt, ast->block.next); + break; + + case TOY_AST_VALUE: + writeInstructionValue(rt, ast->value); + break; + + case TOY_AST_UNARY: + writeInstructionUnary(rt, ast->unary); + break; + + case TOY_AST_BINARY: + writeInstructionBinary(rt, ast->binary); + break; + + //other disallowed instructions + case TOY_AST_GROUP: + fprintf(stderr, TOY_CC_ERROR "Invalid AST type found: Group shouldn't be used\n" TOY_CC_RESET); + exit(-1); + break; + + case TOY_AST_PASS: + //NOTE: this should be disallowed, but for now it's required for testing + // fprintf(stderr, TOY_CC_ERROR "Invalid AST type found: Unknown pass\n" TOY_CC_RESET); + // exit(-1); + break; + + //meta instructions are disallowed + case TOY_AST_ERROR: + fprintf(stderr, TOY_CC_ERROR "Invalid AST type found: Unknown error\n" TOY_CC_RESET); + exit(-1); + break; + + case TOY_AST_END: + fprintf(stderr, TOY_CC_ERROR "Invalid AST type found: Unknown end\n" TOY_CC_RESET); + exit(-1); + break; + } +} + +// static void writeRoutineJumps(Toy_Routine* rt) { +// // +// } + +// static void writeRoutineData(Toy_Routine* rt) { +// // +// } + +static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) { + //build the routine's parts + //param + //code + writeRoutineCode(&rt, ast); + EMIT_BYTE(&rt, TOY_OPCODE_RETURN); //temp terminator + //jumps + //data + + //write the header and combine the parts + void* buffer = TOY_ALLOCATE(unsigned char, 16); + int capacity = 0, count = 0; + // int paramAddr = 0, codeAddr = 0, jumpsAddr = 0, dataAddr = 0, subsAddr = 0; + int codeAddr = 0; + + emitInt(&buffer, &capacity, &count, 0); //total size (overwritten later) + emitInt(&buffer, &capacity, &count, rt->paramCount); //param count + emitInt(&buffer, &capacity, &count, rt->dataCount); //data count + emitInt(&buffer, &capacity, &count, rt->subsCount); //routine count + + //generate blank spaces, cache their positions in the []Addr variables + if (rt->paramCount > 0) { + // paramAddr = count; + emitInt((void**)&buffer, &capacity, &count, 0); //params + } + if (rt->codeCount > 0) { + codeAddr = count; + emitInt((void**)&buffer, &capacity, &count, 0); //code + } + if (rt->jumpsCount > 0) { + // jumpsAddr = count; + emitInt((void**)&buffer, &capacity, &count, 0); //jumps + } + if (rt->dataCount > 0) { + // dataAddr = count; + emitInt((void**)&buffer, &capacity, &count, 0); //data + } + if (rt->subsCount > 0) { + // subsAddr = count; + emitInt((void**)&buffer, &capacity, &count, 0); //subs + } + + //append various parts to the buffer TODO: add the rest + if (rt->codeCount > 0) { + expand(&buffer, &capacity, &count, rt->codeCount); + memcpy((buffer + count), rt->code, rt->codeCount); + + ((int*)buffer)[codeAddr] = count; + count += rt->codeCount; + } + + //finally, record the total size, and return the result + ((int*)buffer)[0] = count; + + return buffer; +} //exposed functions -Toy_Routine Toy_compileRoutine(Toy_Ast* ast) { +void* Toy_compileRoutine(Toy_Ast* ast) { //setup Toy_Routine rt; @@ -36,23 +302,27 @@ Toy_Routine Toy_compileRoutine(Toy_Ast* ast) { rt.codeCapacity = 0; rt.codeCount = 0; + rt.jumps = NULL; + rt.jumpsCapacity = 0; + rt.jumpsCount = 0; + rt.data = NULL; rt.dataCapacity = 0; rt.dataCount = 0; - rt.jump = NULL; - rt.jumpCapacity = 0; - rt.jumpCount = 0; + rt.subs = NULL; + rt.subsCapacity = 0; + rt.subsCount = 0; //build - //TODO + void * buffer = writeRoutine(&rt, ast); - return rt; -} - -void freeRoutine(Toy_Routine rt) { + //cleanup the temp object TOY_FREE_ARRAY(unsigned char, rt.param, rt.paramCapacity); TOY_FREE_ARRAY(unsigned char, rt.code, rt.codeCapacity); + TOY_FREE_ARRAY(int, rt.jumps, rt.jumpsCapacity); TOY_FREE_ARRAY(unsigned char, rt.data, rt.dataCapacity); - TOY_FREE_ARRAY(int, rt.jump, rt.jumpCapacity); + TOY_FREE_ARRAY(unsigned char, rt.subs, rt.subsCapacity); + + return buffer; } \ No newline at end of file diff --git a/source/toy_routine.h b/source/toy_routine.h index b1e909b..3eb447c 100644 --- a/source/toy_routine.h +++ b/source/toy_routine.h @@ -3,7 +3,7 @@ #include "toy_common.h" #include "toy_ast.h" -//routine - holds the individual parts of a compiled routine +//routine - internal structure that holds the individual parts of a compiled routine typedef struct Toy_Routine { unsigned char* param; //c-string params in sequence int paramCapacity; @@ -13,16 +13,17 @@ typedef struct Toy_Routine { int codeCapacity; int codeCount; + int* jumps; //each 'jump' is the starting address of an element within 'data' + int jumpsCapacity; + int jumpsCount; + unsigned char* data; //{type,val} tuples of data int dataCapacity; int dataCount; - int* jump; //each 'jump' is the starting address of an element within 'data' - int jumpCapacity; - int jumpCount; - - //TODO: duplicate the data and jumps for subroutines + unsigned char* subs; //subroutines, recursively + int subsCapacity; + int subsCount; } Toy_Routine; -TOY_API Toy_Routine Toy_compileRoutine(Toy_Ast* ast); -TOY_API void Toy_freeRoutine(Toy_Routine routine); +TOY_API void* Toy_compileRoutine(Toy_Ast* ast); \ No newline at end of file diff --git a/tests/cases/test_bytecode.c b/tests/cases/test_bytecode.c index 8fbed51..861c36e 100644 --- a/tests/cases/test_bytecode.c +++ b/tests/cases/test_bytecode.c @@ -38,8 +38,6 @@ int test_bytecode_header(Toy_Bucket* bucket) { } int main() { - fprintf(stderr, TOY_CC_WARN "WARNING: Bytecode implementation incomplete\n" TOY_CC_RESET); - //run each test set, returning the total errors given int total = 0, res = 0; diff --git a/tests/cases/test_routine.c b/tests/cases/test_routine.c new file mode 100644 index 0000000..f932518 --- /dev/null +++ b/tests/cases/test_routine.c @@ -0,0 +1,47 @@ +#include "toy_routine.h" +#include "toy_console_colors.h" + +#include +#include + +//tests +int test_routine_header(Toy_Bucket* bucket) { + //simple test to ensure the header looks right + { + //setup + Toy_Ast* ast = NULL; + Toy_private_emitAstPass(&bucket, &ast); + + //run + void* buffer = Toy_compileRoutine(ast); + int len = ((int*)buffer)[0]; + + //check + //TODO + + //cleanup + TOY_FREE_ARRAY(unsigned char, buffer, len); + } + + return 0; +} + +int main() { + fprintf(stderr, TOY_CC_WARN "WARNING: Routine tests incomplete\n" TOY_CC_RESET); + + //run each test set, returning the total errors given + int total = 0, res = 0; + + { + Toy_Bucket* bucket = NULL; + TOY_BUCKET_INIT(Toy_Ast, bucket, 32); + res = test_routine_header(bucket); + TOY_BUCKET_FREE(bucket); + if (res == 0) { + printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET); + } + total += res; + } + + return total; +} \ No newline at end of file