mirror of
https://github.com/krgamestudios/Toy.git
synced 2026-04-15 23:04:08 +10:00
Compiler now reuses existing strings in the data, read more
If a string exists in the data, instead of being written, the function 'emitCStringToData()' will instead return the address of the match within the data section. Then, I can search the jump table for that address, and use the existing jump entry or append a new one. Fixes #168
This commit is contained in:
@@ -126,7 +126,7 @@ void Toy_bindModuleBundle(Toy_ModuleBundle* bundle, unsigned char* ptr, unsigned
|
||||
memcpy(bundle->ptr, ptr, size);
|
||||
bundle->count = size;
|
||||
|
||||
//URGENT: test this
|
||||
//TODO: test this
|
||||
int valid = validateModuleBundleHeader(bundle);
|
||||
|
||||
if (valid < 0) {
|
||||
|
||||
@@ -108,44 +108,76 @@ static void emitFloat(unsigned char** handle, unsigned int* capacity, unsigned i
|
||||
//simply get the address (always an integer)
|
||||
#define CURRENT_ADDRESS(mb, part) ((*mb)->part##Count)
|
||||
|
||||
static void emitToJumpTable(Toy_ModuleCompiler** mb, unsigned int startAddr) {
|
||||
EMIT_INT(mb, code, (*mb)->jumpsCount); //mark the jump index in the code
|
||||
EMIT_INT(mb, jumps, startAddr); //save address at the jump index
|
||||
//Cached write to data, enabling string reuse, see #168
|
||||
static unsigned int emitCStringToData(unsigned char** dataHandle, unsigned int* capacity, unsigned int* count, const char* cstr) {
|
||||
const unsigned int slen = (unsigned int)strlen(cstr) + 1; //+1 for null
|
||||
|
||||
//See if the string already exists in the data NOTE: assumes data only ever holds c-strings
|
||||
unsigned int pos = 0;
|
||||
while (pos < *count) {
|
||||
const char* entry = ((char*)(*dataHandle)) + pos;
|
||||
unsigned int elen = strlen(entry) + 1; //+1 for null
|
||||
|
||||
//compare
|
||||
if (slen == elen && strncmp(cstr, entry, slen) == 0) {
|
||||
return pos;
|
||||
}
|
||||
|
||||
//next
|
||||
pos += (elen + 3) & ~3;
|
||||
}
|
||||
|
||||
//default, append the new entry
|
||||
unsigned int addr = *count; //save the target address
|
||||
expand(dataHandle, capacity, count, (slen + 3) & ~3); //4-byte aligned
|
||||
memcpy((*dataHandle) + addr, cstr, slen);
|
||||
*count += (slen + 3) & ~3;
|
||||
|
||||
return addr; //return the address of the string in the data section
|
||||
}
|
||||
|
||||
static unsigned int emitString(Toy_ModuleCompiler** mb, Toy_String* str) {
|
||||
//4-byte alignment
|
||||
unsigned int length = str->info.length + 1;
|
||||
if (length % 4 != 0) {
|
||||
length += 4 - (length % 4); //ceil
|
||||
}
|
||||
length = (length + 3) & ~3;
|
||||
|
||||
//grab the current start address
|
||||
unsigned int startAddr = (*mb)->dataCount;
|
||||
//the address within the data section
|
||||
unsigned int dataAddr = 0;
|
||||
|
||||
//move the string into the data section
|
||||
expand((&((*mb)->data)), &((*mb)->dataCapacity), &((*mb)->dataCount), length);
|
||||
|
||||
if (str->info.type == TOY_STRING_NODE) {
|
||||
char* buffer = Toy_getStringRawBuffer(str);
|
||||
memcpy((*mb)->data + (*mb)->dataCount, buffer, str->info.length + 1);
|
||||
|
||||
dataAddr = emitCStringToData(&(*mb)->data, &(*mb)->dataCapacity, &(*mb)->dataCount, buffer);
|
||||
|
||||
free(buffer);
|
||||
}
|
||||
else if (str->info.type == TOY_STRING_LEAF) {
|
||||
memcpy((*mb)->data + (*mb)->dataCount, str->leaf.data, str->info.length + 1);
|
||||
dataAddr = emitCStringToData(&(*mb)->data, &(*mb)->dataCapacity, &(*mb)->dataCount, str->leaf.data);
|
||||
}
|
||||
else if (str->info.type == TOY_STRING_NAME) {
|
||||
memcpy((*mb)->data + (*mb)->dataCount, str->name.data, str->info.length + 1);
|
||||
dataAddr = emitCStringToData(&(*mb)->data, &(*mb)->dataCapacity, &(*mb)->dataCount, str->name.data);
|
||||
}
|
||||
|
||||
(*mb)->dataCount += length;
|
||||
//mark the position within the jump index, reusing an existing entry if it exists
|
||||
for (unsigned int i = 0; i < (*mb)->jumpsCount; i++) {
|
||||
if ((*mb)->jumps[i] == dataAddr) {
|
||||
//reuse, and finish
|
||||
EMIT_INT(mb, code, i);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
//mark the jump position
|
||||
emitToJumpTable(mb, startAddr);
|
||||
EMIT_INT(mb, code, (*mb)->jumpsCount); //mark the new jump index in the code
|
||||
EMIT_INT(mb, jumps, dataAddr); //append to the jump table
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// static unsigned int emitParameter(Toy_ModuleCompiler** mb, Toy_String* str) {
|
||||
//
|
||||
// }
|
||||
|
||||
static unsigned int writeModuleCompilerCode(Toy_ModuleCompiler** mb, Toy_Ast* ast); //forward declare for recursion
|
||||
static unsigned int writeInstructionAssign(Toy_ModuleCompiler** mb, Toy_AstVarAssign ast, bool chainedAssignment); //forward declare for chaining of var declarations
|
||||
|
||||
|
||||
@@ -1309,6 +1309,90 @@ int test_compiler_keywords(Toy_Bucket** bucketHandle) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_compiler_string_reuse(Toy_Bucket** bucketHandle) {
|
||||
//test string literals
|
||||
{
|
||||
//setup
|
||||
const char* source = "var first: string = \"Hello world\"; var second: string = \"Hello world\";";
|
||||
Toy_Lexer lexer;
|
||||
Toy_Parser parser;
|
||||
|
||||
Toy_bindLexer(&lexer, source);
|
||||
Toy_bindParser(&parser, &lexer);
|
||||
Toy_Ast* ast = Toy_scanParser(bucketHandle, &parser);
|
||||
|
||||
//run
|
||||
unsigned char* buffer = Toy_compileModule(ast);
|
||||
|
||||
//check header
|
||||
int* ptr = (int*)buffer;
|
||||
|
||||
if ((ptr++)[0] != 108 || //total size
|
||||
(ptr++)[0] != 12 || //jump count
|
||||
(ptr++)[0] != 0 || //param count
|
||||
(ptr++)[0] != 28 || //data count
|
||||
(ptr++)[0] != 0 || //subs count
|
||||
(ptr++)[0] != 32 || //code addr
|
||||
(ptr++)[0] != 68 || //jumps addr
|
||||
(ptr++)[0] != 80 || //data addr
|
||||
false) //terminator
|
||||
{
|
||||
fprintf(stderr, TOY_CC_ERROR "ERROR: failed to reuse strings in module header, source: %s\n" TOY_CC_RESET, source);
|
||||
|
||||
//cleanup and return
|
||||
free(buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
//check code
|
||||
if (*((unsigned char*)(buffer + 32)) != TOY_OPCODE_READ ||
|
||||
*((unsigned char*)(buffer + 33)) != TOY_VALUE_STRING ||
|
||||
*((unsigned char*)(buffer + 34)) != TOY_STRING_LEAF ||
|
||||
*((unsigned char*)(buffer + 35)) != 0 ||
|
||||
|
||||
*((unsigned int*)(buffer + 36)) != 0 ||
|
||||
|
||||
*((unsigned char*)(buffer + 40)) != TOY_OPCODE_DECLARE ||
|
||||
*((unsigned char*)(buffer + 41)) != TOY_VALUE_STRING ||
|
||||
*((unsigned char*)(buffer + 42)) != 5 ||
|
||||
*((unsigned char*)(buffer + 43)) != 0 ||
|
||||
|
||||
*((unsigned int*)(buffer + 44)) != 4 ||
|
||||
|
||||
*((unsigned char*)(buffer + 48)) != TOY_OPCODE_READ ||
|
||||
*((unsigned char*)(buffer + 49)) != TOY_VALUE_STRING ||
|
||||
*((unsigned char*)(buffer + 50)) != TOY_STRING_LEAF ||
|
||||
*((unsigned char*)(buffer + 51)) != 0 ||
|
||||
|
||||
*((unsigned int*)(buffer + 52)) != 0 || //duplicate
|
||||
|
||||
*((unsigned char*)(buffer + 56)) != TOY_OPCODE_DECLARE ||
|
||||
*((unsigned char*)(buffer + 57)) != TOY_VALUE_STRING ||
|
||||
*((unsigned char*)(buffer + 58)) != 6 ||
|
||||
*((unsigned char*)(buffer + 59)) != 0 ||
|
||||
|
||||
*((unsigned int*)(buffer + 60)) != 8 ||
|
||||
|
||||
*((unsigned char*)(buffer + 64)) != TOY_OPCODE_RETURN ||
|
||||
*((unsigned char*)(buffer + 65)) != 0 ||
|
||||
*((unsigned char*)(buffer + 66)) != 0 ||
|
||||
*((unsigned char*)(buffer + 67)) != 0
|
||||
)
|
||||
{
|
||||
fprintf(stderr, TOY_CC_ERROR "ERROR: failed to produce the expected module code, source: %s\n" TOY_CC_RESET, source);
|
||||
|
||||
//cleanup and return
|
||||
free(buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
//cleanup
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
//run each test set, returning the total errors given
|
||||
int total = 0, res = 0;
|
||||
@@ -1343,5 +1427,15 @@ int main(void) {
|
||||
total += res;
|
||||
}
|
||||
|
||||
{
|
||||
Toy_Bucket* bucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
|
||||
res = test_compiler_string_reuse(&bucket);
|
||||
Toy_freeBucket(&bucket);
|
||||
if (res == 0) {
|
||||
printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET);
|
||||
}
|
||||
total += res;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user