mirror of
https://github.com/krgamestudios/Toy.git
synced 2026-04-15 14:54:07 +10:00
Compiler now reuses existing strings in the data, read more
If a string exists in the data, instead of being written, the function 'emitCStringToData()' will instead return the address of the match within the data section. Then, I can search the jump table for that address, and use the existing jump entry or append a new one. Fixes #168
This commit is contained in:
@@ -126,7 +126,7 @@ void Toy_bindModuleBundle(Toy_ModuleBundle* bundle, unsigned char* ptr, unsigned
|
|||||||
memcpy(bundle->ptr, ptr, size);
|
memcpy(bundle->ptr, ptr, size);
|
||||||
bundle->count = size;
|
bundle->count = size;
|
||||||
|
|
||||||
//URGENT: test this
|
//TODO: test this
|
||||||
int valid = validateModuleBundleHeader(bundle);
|
int valid = validateModuleBundleHeader(bundle);
|
||||||
|
|
||||||
if (valid < 0) {
|
if (valid < 0) {
|
||||||
|
|||||||
@@ -108,44 +108,76 @@ static void emitFloat(unsigned char** handle, unsigned int* capacity, unsigned i
|
|||||||
//simply get the address (always an integer)
|
//simply get the address (always an integer)
|
||||||
#define CURRENT_ADDRESS(mb, part) ((*mb)->part##Count)
|
#define CURRENT_ADDRESS(mb, part) ((*mb)->part##Count)
|
||||||
|
|
||||||
static void emitToJumpTable(Toy_ModuleCompiler** mb, unsigned int startAddr) {
|
//Cached write to data, enabling string reuse, see #168
|
||||||
EMIT_INT(mb, code, (*mb)->jumpsCount); //mark the jump index in the code
|
static unsigned int emitCStringToData(unsigned char** dataHandle, unsigned int* capacity, unsigned int* count, const char* cstr) {
|
||||||
EMIT_INT(mb, jumps, startAddr); //save address at the jump index
|
const unsigned int slen = (unsigned int)strlen(cstr) + 1; //+1 for null
|
||||||
|
|
||||||
|
//See if the string already exists in the data NOTE: assumes data only ever holds c-strings
|
||||||
|
unsigned int pos = 0;
|
||||||
|
while (pos < *count) {
|
||||||
|
const char* entry = ((char*)(*dataHandle)) + pos;
|
||||||
|
unsigned int elen = strlen(entry) + 1; //+1 for null
|
||||||
|
|
||||||
|
//compare
|
||||||
|
if (slen == elen && strncmp(cstr, entry, slen) == 0) {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
//next
|
||||||
|
pos += (elen + 3) & ~3;
|
||||||
|
}
|
||||||
|
|
||||||
|
//default, append the new entry
|
||||||
|
unsigned int addr = *count; //save the target address
|
||||||
|
expand(dataHandle, capacity, count, (slen + 3) & ~3); //4-byte aligned
|
||||||
|
memcpy((*dataHandle) + addr, cstr, slen);
|
||||||
|
*count += (slen + 3) & ~3;
|
||||||
|
|
||||||
|
return addr; //return the address of the string in the data section
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int emitString(Toy_ModuleCompiler** mb, Toy_String* str) {
|
static unsigned int emitString(Toy_ModuleCompiler** mb, Toy_String* str) {
|
||||||
//4-byte alignment
|
//4-byte alignment
|
||||||
unsigned int length = str->info.length + 1;
|
unsigned int length = str->info.length + 1;
|
||||||
if (length % 4 != 0) {
|
length = (length + 3) & ~3;
|
||||||
length += 4 - (length % 4); //ceil
|
|
||||||
}
|
|
||||||
|
|
||||||
//grab the current start address
|
//the address within the data section
|
||||||
unsigned int startAddr = (*mb)->dataCount;
|
unsigned int dataAddr = 0;
|
||||||
|
|
||||||
//move the string into the data section
|
//move the string into the data section
|
||||||
expand((&((*mb)->data)), &((*mb)->dataCapacity), &((*mb)->dataCount), length);
|
|
||||||
|
|
||||||
if (str->info.type == TOY_STRING_NODE) {
|
if (str->info.type == TOY_STRING_NODE) {
|
||||||
char* buffer = Toy_getStringRawBuffer(str);
|
char* buffer = Toy_getStringRawBuffer(str);
|
||||||
memcpy((*mb)->data + (*mb)->dataCount, buffer, str->info.length + 1);
|
|
||||||
|
dataAddr = emitCStringToData(&(*mb)->data, &(*mb)->dataCapacity, &(*mb)->dataCount, buffer);
|
||||||
|
|
||||||
free(buffer);
|
free(buffer);
|
||||||
}
|
}
|
||||||
else if (str->info.type == TOY_STRING_LEAF) {
|
else if (str->info.type == TOY_STRING_LEAF) {
|
||||||
memcpy((*mb)->data + (*mb)->dataCount, str->leaf.data, str->info.length + 1);
|
dataAddr = emitCStringToData(&(*mb)->data, &(*mb)->dataCapacity, &(*mb)->dataCount, str->leaf.data);
|
||||||
}
|
}
|
||||||
else if (str->info.type == TOY_STRING_NAME) {
|
else if (str->info.type == TOY_STRING_NAME) {
|
||||||
memcpy((*mb)->data + (*mb)->dataCount, str->name.data, str->info.length + 1);
|
dataAddr = emitCStringToData(&(*mb)->data, &(*mb)->dataCapacity, &(*mb)->dataCount, str->name.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
(*mb)->dataCount += length;
|
//mark the position within the jump index, reusing an existing entry if it exists
|
||||||
|
for (unsigned int i = 0; i < (*mb)->jumpsCount; i++) {
|
||||||
|
if ((*mb)->jumps[i] == dataAddr) {
|
||||||
|
//reuse, and finish
|
||||||
|
EMIT_INT(mb, code, i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//mark the jump position
|
EMIT_INT(mb, code, (*mb)->jumpsCount); //mark the new jump index in the code
|
||||||
emitToJumpTable(mb, startAddr);
|
EMIT_INT(mb, jumps, dataAddr); //append to the jump table
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// static unsigned int emitParameter(Toy_ModuleCompiler** mb, Toy_String* str) {
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
static unsigned int writeModuleCompilerCode(Toy_ModuleCompiler** mb, Toy_Ast* ast); //forward declare for recursion
|
static unsigned int writeModuleCompilerCode(Toy_ModuleCompiler** mb, Toy_Ast* ast); //forward declare for recursion
|
||||||
static unsigned int writeInstructionAssign(Toy_ModuleCompiler** mb, Toy_AstVarAssign ast, bool chainedAssignment); //forward declare for chaining of var declarations
|
static unsigned int writeInstructionAssign(Toy_ModuleCompiler** mb, Toy_AstVarAssign ast, bool chainedAssignment); //forward declare for chaining of var declarations
|
||||||
|
|
||||||
|
|||||||
@@ -1309,6 +1309,90 @@ int test_compiler_keywords(Toy_Bucket** bucketHandle) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int test_compiler_string_reuse(Toy_Bucket** bucketHandle) {
|
||||||
|
//test string literals
|
||||||
|
{
|
||||||
|
//setup
|
||||||
|
const char* source = "var first: string = \"Hello world\"; var second: string = \"Hello world\";";
|
||||||
|
Toy_Lexer lexer;
|
||||||
|
Toy_Parser parser;
|
||||||
|
|
||||||
|
Toy_bindLexer(&lexer, source);
|
||||||
|
Toy_bindParser(&parser, &lexer);
|
||||||
|
Toy_Ast* ast = Toy_scanParser(bucketHandle, &parser);
|
||||||
|
|
||||||
|
//run
|
||||||
|
unsigned char* buffer = Toy_compileModule(ast);
|
||||||
|
|
||||||
|
//check header
|
||||||
|
int* ptr = (int*)buffer;
|
||||||
|
|
||||||
|
if ((ptr++)[0] != 108 || //total size
|
||||||
|
(ptr++)[0] != 12 || //jump count
|
||||||
|
(ptr++)[0] != 0 || //param count
|
||||||
|
(ptr++)[0] != 28 || //data count
|
||||||
|
(ptr++)[0] != 0 || //subs count
|
||||||
|
(ptr++)[0] != 32 || //code addr
|
||||||
|
(ptr++)[0] != 68 || //jumps addr
|
||||||
|
(ptr++)[0] != 80 || //data addr
|
||||||
|
false) //terminator
|
||||||
|
{
|
||||||
|
fprintf(stderr, TOY_CC_ERROR "ERROR: failed to reuse strings in module header, source: %s\n" TOY_CC_RESET, source);
|
||||||
|
|
||||||
|
//cleanup and return
|
||||||
|
free(buffer);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
//check code
|
||||||
|
if (*((unsigned char*)(buffer + 32)) != TOY_OPCODE_READ ||
|
||||||
|
*((unsigned char*)(buffer + 33)) != TOY_VALUE_STRING ||
|
||||||
|
*((unsigned char*)(buffer + 34)) != TOY_STRING_LEAF ||
|
||||||
|
*((unsigned char*)(buffer + 35)) != 0 ||
|
||||||
|
|
||||||
|
*((unsigned int*)(buffer + 36)) != 0 ||
|
||||||
|
|
||||||
|
*((unsigned char*)(buffer + 40)) != TOY_OPCODE_DECLARE ||
|
||||||
|
*((unsigned char*)(buffer + 41)) != TOY_VALUE_STRING ||
|
||||||
|
*((unsigned char*)(buffer + 42)) != 5 ||
|
||||||
|
*((unsigned char*)(buffer + 43)) != 0 ||
|
||||||
|
|
||||||
|
*((unsigned int*)(buffer + 44)) != 4 ||
|
||||||
|
|
||||||
|
*((unsigned char*)(buffer + 48)) != TOY_OPCODE_READ ||
|
||||||
|
*((unsigned char*)(buffer + 49)) != TOY_VALUE_STRING ||
|
||||||
|
*((unsigned char*)(buffer + 50)) != TOY_STRING_LEAF ||
|
||||||
|
*((unsigned char*)(buffer + 51)) != 0 ||
|
||||||
|
|
||||||
|
*((unsigned int*)(buffer + 52)) != 0 || //duplicate
|
||||||
|
|
||||||
|
*((unsigned char*)(buffer + 56)) != TOY_OPCODE_DECLARE ||
|
||||||
|
*((unsigned char*)(buffer + 57)) != TOY_VALUE_STRING ||
|
||||||
|
*((unsigned char*)(buffer + 58)) != 6 ||
|
||||||
|
*((unsigned char*)(buffer + 59)) != 0 ||
|
||||||
|
|
||||||
|
*((unsigned int*)(buffer + 60)) != 8 ||
|
||||||
|
|
||||||
|
*((unsigned char*)(buffer + 64)) != TOY_OPCODE_RETURN ||
|
||||||
|
*((unsigned char*)(buffer + 65)) != 0 ||
|
||||||
|
*((unsigned char*)(buffer + 66)) != 0 ||
|
||||||
|
*((unsigned char*)(buffer + 67)) != 0
|
||||||
|
)
|
||||||
|
{
|
||||||
|
fprintf(stderr, TOY_CC_ERROR "ERROR: failed to produce the expected module code, source: %s\n" TOY_CC_RESET, source);
|
||||||
|
|
||||||
|
//cleanup and return
|
||||||
|
free(buffer);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
//cleanup
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
//run each test set, returning the total errors given
|
//run each test set, returning the total errors given
|
||||||
int total = 0, res = 0;
|
int total = 0, res = 0;
|
||||||
@@ -1343,5 +1427,15 @@ int main(void) {
|
|||||||
total += res;
|
total += res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Toy_Bucket* bucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
|
||||||
|
res = test_compiler_string_reuse(&bucket);
|
||||||
|
Toy_freeBucket(&bucket);
|
||||||
|
if (res == 0) {
|
||||||
|
printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET);
|
||||||
|
}
|
||||||
|
total += res;
|
||||||
|
}
|
||||||
|
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user