Toy_String now fragments strings that are too long

This commit is contained in:
2024-10-12 20:20:19 +11:00
parent c1d72adb71
commit 7b1dbf25ff
4 changed files with 91 additions and 35 deletions

View File

@@ -608,11 +608,6 @@ void Toy_bindParser(Toy_Parser* parser, Toy_Lexer* lexer) {
Toy_Ast* Toy_scanParser(Toy_Bucket** bucketHandle, Toy_Parser* parser) {
Toy_Ast* rootHandle = NULL;
//double check bucket capacity for strings
if ((*bucketHandle)->capacity < TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_WARN "WARNING: Bucket capacity in Toy_scanParser() is smaller than TOY_STRING_MAX_LENGTH" TOY_CC_RESET);
}
//check for EOF
if (match(parser, TOY_TOKEN_EOF)) {
Toy_private_emitAstEnd(bucketHandle, &rootHandle);

View File

@@ -6,6 +6,8 @@
#include <string.h>
//utils
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
static void deepCopyUtil(char* dest, Toy_String* str) {
//sometimes, "clever" can be a bad thing...
if (str->type == TOY_STRING_NODE) {
@@ -45,20 +47,13 @@ static unsigned int hashCString(const char* string) {
return hash;
}
//exposed functions
Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) {
int length = strlen(cstring);
return Toy_createStringLength(bucketHandle, cstring, length);
}
Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length) {
if (length > TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
static Toy_String* partitionStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) {
if (sizeof(Toy_String) + length + 1 > (*bucketHandle)->capacity) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't partition enough space for a string, requested %d length (%d total) but buckets have a capacity of %d\n" TOY_CC_RESET, (int)length, (int)(sizeof(Toy_String) + length + 1), (int)((*bucketHandle)->capacity));
exit(-1);
}
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1);
ret->type = TOY_STRING_LEAF;
ret->length = length;
@@ -70,15 +65,42 @@ Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstrin
return ret;
}
TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type) {
int length = strlen(cname);
//exposed functions
Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) {
unsigned int length = strlen(cstring);
if (length > TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a name string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
return Toy_createStringLength(bucketHandle, cstring, length);
}
Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) {
//normal behaviour
if (length < (*bucketHandle)->capacity - sizeof(Toy_String) - 1) {
return partitionStringLength(bucketHandle, cstring, length);
}
//break the string up if it's too long
Toy_String* result = NULL;
for (unsigned int i = 0; i < length; i += (*bucketHandle)->capacity - sizeof(Toy_String) - 1) { //increment by the amount actually used by the cstring
unsigned int amount = MIN((length - i), (*bucketHandle)->capacity - sizeof(Toy_String) - 1);
Toy_String* fragment = partitionStringLength(bucketHandle, cstring + i, amount);
result = result == NULL ? fragment : Toy_concatStrings(bucketHandle, result, fragment);
}
return result;
}
TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type) {
unsigned int length = strlen(cname);
//name strings can't be broken up
if (sizeof(Toy_String) + length + 1 > (*bucketHandle)->capacity) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't partition enough space for a name string, requested %d length (%d total) but buckets have a capacity of %d\n" TOY_CC_RESET, (int)length, (int)(sizeof(Toy_String) + length + 1), (int)((*bucketHandle)->capacity));
exit(-1);
}
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1);
ret->type = TOY_STRING_NAME;
ret->length = length;
@@ -105,7 +127,16 @@ Toy_String* Toy_deepCopyString(Toy_Bucket** bucketHandle, Toy_String* str) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't deep copy a string with refcount of zero\n" TOY_CC_RESET);
exit(-1);
}
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + str->length + 1); //TODO: compensate for partitioning more space than bucket capacity
//handle deep copies of strings that are too long for the bucket capacity NOTE: slow, could replace this at some point
if (sizeof(Toy_String) + str->length + 1 > (*bucketHandle)->capacity) {
char* buffer = Toy_getStringRawBuffer(str);
Toy_String* result = Toy_createStringLength(bucketHandle, buffer, str->length); //handles the fragmenting
free(buffer);
return result;
}
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + str->length + 1);
if (str->type == TOY_STRING_NODE || str->type == TOY_STRING_LEAF) {
ret->type = TOY_STRING_LEAF;
@@ -138,11 +169,6 @@ Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* left, Toy_S
exit(-1);
}
if (left->length + right->length > TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't concat a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
exit(-1);
}
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String));
ret->type = TOY_STRING_NODE;
@@ -162,11 +188,11 @@ void Toy_freeString(Toy_String* str) {
decrementRefCount(str); //TODO: tool for checking the bucket is empty, and freeing it
}
int Toy_getStringLength(Toy_String* str) {
unsigned int Toy_getStringLength(Toy_String* str) {
return str->length;
}
int Toy_getStringRefCount(Toy_String* str) {
unsigned int Toy_getStringRefCount(Toy_String* str) {
return str->refCount;
}

View File

@@ -5,9 +5,6 @@
#include "toy_bucket.h"
#include "toy_value.h"
//TODO: Remove this (related to partitioning more space in a bucket issue)
#define TOY_STRING_MAX_LENGTH 1000
//rope pattern
typedef struct Toy_String { //32 | 64 BITNESS
enum Toy_StringType {
@@ -39,7 +36,7 @@ typedef struct Toy_String { //32 | 64 BITNESS
} Toy_String; //24 | 32
TOY_API Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring);
TOY_API Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length);
TOY_API Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length);
TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type); //for variable names
@@ -50,8 +47,8 @@ TOY_API Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* lef
TOY_API void Toy_freeString(Toy_String* str);
TOY_API int Toy_getStringLength(Toy_String* str);
TOY_API int Toy_getStringRefCount(Toy_String* str);
TOY_API unsigned int Toy_getStringLength(Toy_String* str);
TOY_API unsigned int Toy_getStringRefCount(Toy_String* str);
TOY_API char* Toy_getStringRawBuffer(Toy_String* str); //allocates the buffer on the heap, needs to be freed

View File

@@ -792,6 +792,36 @@ int test_string_diffs() {
return 0;
}
int test_string_fragmenting() {
//allocate a long string
{
//setup
Toy_Bucket* bucket = Toy_allocateBucket(128); //deliberately too small for the cstring
//445 charaters
const char* cstring = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
Toy_String* str = Toy_createString(&bucket, cstring);
//check
if (str->type != TOY_STRING_NODE ||
str->length != 445 ||
str->refCount != 1)
{
fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to fragment a string within Toy_String\n" TOY_CC_RESET);
Toy_freeString(str);
Toy_freeBucket(&bucket);
return -1;
}
//cleanup
Toy_freeString(str);
Toy_freeBucket(&bucket);
}
return 0;
}
int main() {
//run each test set, returning the total errors given
int total = 0, res = 0;
@@ -848,5 +878,13 @@ int main() {
total += res;
}
{
res = test_string_fragmenting();
if (res == 0) {
printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET);
}
total += res;
}
return total;
}