String literals are being parsed, compiled and printed, read more

Strings, due to their potentially large size, are stored outside of a
routine's code section, in the data section. To access the correct
string, you must read the jump index, then the real address from the
jump table - and extra layer of indirection will result in more flexible
data down the road, I hope.

Other changes include:

* Added string concat operator ..
* Added TOY_STRING_MAX_LENGTH
* Strings can't be created or concatenated longer than the max length
* The parser will display a warning if the bucket is too small for a
  string at max length, but it will continue
* Added TOY_BUCKET_IDEAL to correspend with max string length
* The bucket now allocates an address that is 4-byte aligned
* Fixed missing entries in the parser rule table
* Corrected some failing TOY_BITNESS tests
This commit is contained in:
2024-10-07 23:05:36 +11:00
parent 14653a303f
commit 4bcf8e84a9
23 changed files with 572 additions and 195 deletions

View File

@@ -3,6 +3,7 @@
#include "toy_opcodes.h"
#include "toy_value.h"
#include "toy_string.h"
#include <stdio.h>
#include <stdlib.h>
@@ -17,7 +18,7 @@ static void expand(void** handle, unsigned int* capacity, unsigned int* count, u
(*handle) = realloc((*handle), (*capacity));
if ((*handle) == NULL) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to allocate a 'Toy_Routine' of %d capacity\n" TOY_CC_RESET, (int)(*capacity));
fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to allocate %d space for a part of 'Toy_Routine'\n" TOY_CC_RESET, (int)(*capacity));
exit(1);
}
}
@@ -45,48 +46,91 @@ static void emitFloat(void** handle, unsigned int* capacity, unsigned int* count
}
//write instructions based on the AST types
#define EMIT_BYTE(rt, byte) \
emitByte((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte);
#define EMIT_INT(rt, code, byte) \
emitInt((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte);
#define EMIT_FLOAT(rt, code, byte) \
emitFloat((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte);
#define EMIT_BYTE(rt, part, byte) \
emitByte((void**)(&((*rt)->part)), &((*rt)->part##Capacity), &((*rt)->part##Count), byte);
#define EMIT_INT(rt, part, bytes) \
emitInt((void**)(&((*rt)->part)), &((*rt)->part##Capacity), &((*rt)->part##Count), bytes);
#define EMIT_FLOAT(rt, part, bytes) \
emitFloat((void**)(&((*rt)->part)), &((*rt)->part##Capacity), &((*rt)->part##Count), bytes);
static void emitToJumpTable(Toy_Routine** rt, unsigned int startAddr) {
EMIT_INT(rt, code, (*rt)->jumpsCount); //mark the jump index in the code
EMIT_INT(rt, jumps, startAddr); //save address at the jump index
}
static void emitString(Toy_Routine** rt, Toy_String* str) {
//4-byte alignment
unsigned int length = str->length + 1;
if (length % 4 != 0) {
length += 4 - (length % 4); //ceil
}
//grab the current start address
unsigned int startAddr = (*rt)->dataCount;
//move the string into the data section
expand((void**)(&((*rt)->data)), &((*rt)->dataCapacity), &((*rt)->dataCount), (*rt)->dataCount + length);
if (str->type == TOY_STRING_NODE) {
char* buffer = Toy_getStringRawBuffer(str);
memcpy((*rt)->data + (*rt)->dataCount, buffer, str->length + 1);
free(buffer);
}
else if (str->type == TOY_STRING_LEAF) {
memcpy((*rt)->data + (*rt)->dataCount, str->as.leaf.data, str->length + 1);
}
else if (str->type == TOY_STRING_NAME) {
memcpy((*rt)->data + (*rt)->dataCount, str->as.name.data, str->length + 1);
}
(*rt)->dataCount += length;
//mark the jump position
emitToJumpTable(rt, startAddr);
}
static void writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast); //forward declare for recursion
static void writeInstructionValue(Toy_Routine** rt, Toy_AstValue ast) {
//TODO: store more complex values in the data code
EMIT_BYTE(rt, TOY_OPCODE_READ);
EMIT_BYTE(rt, ast.value.type);
EMIT_BYTE(rt, code, TOY_OPCODE_READ);
EMIT_BYTE(rt, code, ast.value.type);
//emit the raw value based on the type
if (TOY_VALUE_IS_NULL(ast.value)) {
//NOTHING - null's type data is enough
//4-byte alignment
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code, 0);
EMIT_BYTE(rt, code, 0);
}
else if (TOY_VALUE_IS_BOOLEAN(ast.value)) {
EMIT_BYTE(rt, TOY_VALUE_AS_BOOLEAN(ast.value));
EMIT_BYTE(rt, code, TOY_VALUE_AS_BOOLEAN(ast.value));
//4-byte alignment
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code, 0);
}
else if (TOY_VALUE_IS_INTEGER(ast.value)) {
//4-byte alignment
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code, 0);
EMIT_BYTE(rt, code, 0);
EMIT_INT(rt, code, TOY_VALUE_AS_INTEGER(ast.value));
}
else if (TOY_VALUE_IS_FLOAT(ast.value)) {
//4-byte alignment
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code, 0);
EMIT_BYTE(rt, code, 0);
EMIT_FLOAT(rt, code, TOY_VALUE_AS_FLOAT(ast.value));
}
else if (TOY_VALUE_IS_STRING(ast.value)) {
//4-byte alignment
EMIT_BYTE(rt, code, 0);
EMIT_BYTE(rt, code, 0);
emitString(rt, TOY_VALUE_AS_STRING(ast.value));
}
else {
fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST type found: Unknown value type\n" TOY_CC_RESET);
exit(-1);
@@ -98,12 +142,12 @@ static void writeInstructionUnary(Toy_Routine** rt, Toy_AstUnary ast) {
writeRoutineCode(rt, ast.child);
if (ast.flag == TOY_AST_FLAG_NEGATE) {
EMIT_BYTE(rt, TOY_OPCODE_NEGATE);
EMIT_BYTE(rt, code, TOY_OPCODE_NEGATE);
//4-byte alignment
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code, 0);
EMIT_BYTE(rt, code, 0);
EMIT_BYTE(rt, code, 0);
}
else {
fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST unary flag found\n" TOY_CC_RESET);
@@ -117,80 +161,83 @@ static void writeInstructionBinary(Toy_Routine** rt, Toy_AstBinary ast) {
writeRoutineCode(rt, ast.right);
if (ast.flag == TOY_AST_FLAG_ADD) {
EMIT_BYTE(rt, TOY_OPCODE_ADD);
EMIT_BYTE(rt, code,TOY_OPCODE_ADD);
}
else if (ast.flag == TOY_AST_FLAG_SUBTRACT) {
EMIT_BYTE(rt, TOY_OPCODE_SUBTRACT);
EMIT_BYTE(rt, code,TOY_OPCODE_SUBTRACT);
}
else if (ast.flag == TOY_AST_FLAG_MULTIPLY) {
EMIT_BYTE(rt, TOY_OPCODE_MULTIPLY);
EMIT_BYTE(rt, code,TOY_OPCODE_MULTIPLY);
}
else if (ast.flag == TOY_AST_FLAG_DIVIDE) {
EMIT_BYTE(rt, TOY_OPCODE_DIVIDE);
EMIT_BYTE(rt, code,TOY_OPCODE_DIVIDE);
}
else if (ast.flag == TOY_AST_FLAG_MODULO) {
EMIT_BYTE(rt, TOY_OPCODE_MODULO);
EMIT_BYTE(rt, code,TOY_OPCODE_MODULO);
}
// else if (ast.flag == TOY_AST_FLAG_ASSIGN) {
// EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
// EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
// //TODO: emit the env symbol to store TOP(S) within
// }
// else if (ast.flag == TOY_AST_FLAG_ADD_ASSIGN) {
// EMIT_BYTE(rt, TOY_OPCODE_ADD);
// EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
// EMIT_BYTE(rt, code,TOY_OPCODE_ADD);
// EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
// //TODO: emit the env symbol to store TOP(S) within
// }
// else if (ast.flag == TOY_AST_FLAG_SUBTRACT_ASSIGN) {
// EMIT_BYTE(rt, TOY_OPCODE_SUBTRACT);
// EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
// EMIT_BYTE(rt, code,TOY_OPCODE_SUBTRACT);
// EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
// //TODO: emit the env symbol to store TOP(S) within
// }
// else if (ast.flag == TOY_AST_FLAG_MULTIPLY_ASSIGN) {
// EMIT_BYTE(rt, TOY_OPCODE_MULTIPLY);
// EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
// EMIT_BYTE(rt, code,TOY_OPCODE_MULTIPLY);
// EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
// //TODO: emit the env symbol to store TOP(S) within
// }
// else if (ast.flag == TOY_AST_FLAG_DIVIDE_ASSIGN) {
// EMIT_BYTE(rt, TOY_OPCODE_DIVIDE);
// EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
// EMIT_BYTE(rt, code,TOY_OPCODE_DIVIDE);
// EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
// //TODO: emit the env symbol to store TOP(S) within
// }
// else if (ast.flag == TOY_AST_FLAG_MODULO_ASSIGN) {
// EMIT_BYTE(rt, TOY_OPCODE_MODULO);
// EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
// EMIT_BYTE(rt, code,TOY_OPCODE_MODULO);
// EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
// //TODO: emit the env symbol to store TOP(S) within
// }
else if (ast.flag == TOY_AST_FLAG_COMPARE_EQUAL) {
EMIT_BYTE(rt, TOY_OPCODE_COMPARE_EQUAL);
EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_EQUAL);
}
else if (ast.flag == TOY_AST_FLAG_COMPARE_NOT) {
EMIT_BYTE(rt, TOY_OPCODE_COMPARE_EQUAL);
EMIT_BYTE(rt, TOY_OPCODE_NEGATE); //squeezed into one word
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_EQUAL);
EMIT_BYTE(rt, code,TOY_OPCODE_NEGATE); //squeezed into one word
EMIT_BYTE(rt, code,0);
EMIT_BYTE(rt, code,0);
return;
}
else if (ast.flag == TOY_AST_FLAG_COMPARE_LESS) {
EMIT_BYTE(rt, TOY_OPCODE_COMPARE_LESS);
EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_LESS);
}
else if (ast.flag == TOY_AST_FLAG_COMPARE_LESS_EQUAL) {
EMIT_BYTE(rt, TOY_OPCODE_COMPARE_LESS_EQUAL);
EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_LESS_EQUAL);
}
else if (ast.flag == TOY_AST_FLAG_COMPARE_GREATER) {
EMIT_BYTE(rt, TOY_OPCODE_COMPARE_GREATER);
EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_GREATER);
}
else if (ast.flag == TOY_AST_FLAG_COMPARE_GREATER_EQUAL) {
EMIT_BYTE(rt, TOY_OPCODE_COMPARE_GREATER_EQUAL);
EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_GREATER_EQUAL);
}
else if (ast.flag == TOY_AST_FLAG_AND) {
EMIT_BYTE(rt, TOY_OPCODE_AND);
EMIT_BYTE(rt, code,TOY_OPCODE_AND);
}
else if (ast.flag == TOY_AST_FLAG_OR) {
EMIT_BYTE(rt, TOY_OPCODE_OR);
EMIT_BYTE(rt, code,TOY_OPCODE_OR);
}
else if (ast.flag == TOY_AST_FLAG_CONCAT) {
EMIT_BYTE(rt, code, TOY_OPCODE_CONCAT);
}
else {
fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST binary flag found\n" TOY_CC_RESET);
@@ -198,9 +245,9 @@ static void writeInstructionBinary(Toy_Routine** rt, Toy_AstBinary ast) {
}
//4-byte alignment (covers most cases)
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code,0);
EMIT_BYTE(rt, code,0);
EMIT_BYTE(rt, code,0);
}
static void writeInstructionPrint(Toy_Routine** rt, Toy_AstPrint ast) {
@@ -208,12 +255,12 @@ static void writeInstructionPrint(Toy_Routine** rt, Toy_AstPrint ast) {
writeRoutineCode(rt, ast.child);
//output the print opcode
EMIT_BYTE(rt, TOY_OPCODE_PRINT);
EMIT_BYTE(rt, code,TOY_OPCODE_PRINT);
//4-byte alignment
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, 0);
EMIT_BYTE(rt, code,0);
EMIT_BYTE(rt, code,0);
EMIT_BYTE(rt, code,0);
}
//routine structure
@@ -274,39 +321,31 @@ static void writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast) {
}
}
// static void writeRoutineJumps(Toy_Routine* rt) {
// //
// }
// static void writeRoutineData(Toy_Routine* rt) {
// //
// }
static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
//build the routine's parts
//TODO: param
//code
writeRoutineCode(&rt, ast);
EMIT_BYTE(&rt, TOY_OPCODE_RETURN); //temp terminator
EMIT_BYTE(&rt, 0); //4-byte alignment
EMIT_BYTE(&rt, 0);
EMIT_BYTE(&rt, 0);
//TODO: jumps
//TODO: data
EMIT_BYTE(&rt, code, TOY_OPCODE_RETURN); //temp terminator
EMIT_BYTE(&rt, code, 0); //4-byte alignment
EMIT_BYTE(&rt, code, 0);
EMIT_BYTE(&rt, code, 0);
//write the header and combine the parts
void* buffer = NULL;
unsigned int capacity = 0, count = 0;
// int paramAddr = 0, codeAddr = 0, jumpsAddr = 0, dataAddr = 0, subsAddr = 0;
// int paramAddr = 0, codeAddr = 0, subsAddr = 0;
int codeAddr = 0;
int jumpsAddr = 0;
int dataAddr = 0;
emitInt(&buffer, &capacity, &count, 0); //total size (overwritten later)
emitInt(&buffer, &capacity, &count, rt->paramCount); //param count
emitInt(&buffer, &capacity, &count, rt->jumpsCount); //jumps count
emitInt(&buffer, &capacity, &count, rt->dataCount); //data count
emitInt(&buffer, &capacity, &count, rt->subsCount); //routine count
emitInt(&buffer, &capacity, &count, rt->paramCount); //param size
emitInt(&buffer, &capacity, &count, rt->jumpsCount); //jumps size
emitInt(&buffer, &capacity, &count, rt->dataCount); //data size
emitInt(&buffer, &capacity, &count, rt->subsCount); //routine size
//generate blank spaces, cache their positions in the []Addr variables
//generate blank spaces, cache their positions in the *Addr variables (for storing the start positions)
if (rt->paramCount > 0) {
// paramAddr = count;
emitInt((void**)&buffer, &capacity, &count, 0); //params
@@ -316,11 +355,11 @@ static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
emitInt((void**)&buffer, &capacity, &count, 0); //code
}
if (rt->jumpsCount > 0) {
// jumpsAddr = count;
jumpsAddr = count;
emitInt((void**)&buffer, &capacity, &count, 0); //jumps
}
if (rt->dataCount > 0) {
// dataAddr = count;
dataAddr = count;
emitInt((void**)&buffer, &capacity, &count, 0); //data
}
if (rt->subsCount > 0) {
@@ -337,6 +376,22 @@ static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
count += rt->codeCount;
}
if (rt->jumpsCount > 0) {
expand(&buffer, &capacity, &count, rt->jumpsCount);
memcpy((buffer + count), rt->jumps, rt->jumpsCount);
*((int*)(buffer + jumpsAddr)) = count;
count += rt->jumpsCount;
}
if (rt->dataCount > 0) {
expand(&buffer, &capacity, &count, rt->dataCount);
memcpy((buffer + count), rt->data, rt->dataCount);
*((int*)(buffer + dataAddr)) = count;
count += rt->dataCount;
}
//finally, record the total size within the header, and return the result
*((int*)buffer) = count;