From 7b1dbf25ff568f21839ec3ee78a12dadf109b7ad Mon Sep 17 00:00:00 2001 From: Kayne Ruse Date: Sat, 12 Oct 2024 20:20:19 +1100 Subject: [PATCH] Toy_String now fragments strings that are too long --- source/toy_parser.c | 5 --- source/toy_string.c | 74 ++++++++++++++++++++++++++------------- source/toy_string.h | 9 ++--- tests/cases/test_string.c | 38 ++++++++++++++++++++ 4 files changed, 91 insertions(+), 35 deletions(-) diff --git a/source/toy_parser.c b/source/toy_parser.c index 5ef5d52..de5df43 100644 --- a/source/toy_parser.c +++ b/source/toy_parser.c @@ -608,11 +608,6 @@ void Toy_bindParser(Toy_Parser* parser, Toy_Lexer* lexer) { Toy_Ast* Toy_scanParser(Toy_Bucket** bucketHandle, Toy_Parser* parser) { Toy_Ast* rootHandle = NULL; - //double check bucket capacity for strings - if ((*bucketHandle)->capacity < TOY_STRING_MAX_LENGTH) { - fprintf(stderr, TOY_CC_WARN "WARNING: Bucket capacity in Toy_scanParser() is smaller than TOY_STRING_MAX_LENGTH" TOY_CC_RESET); - } - //check for EOF if (match(parser, TOY_TOKEN_EOF)) { Toy_private_emitAstEnd(bucketHandle, &rootHandle); diff --git a/source/toy_string.c b/source/toy_string.c index 147bd20..a1a27a0 100644 --- a/source/toy_string.c +++ b/source/toy_string.c @@ -6,6 +6,8 @@ #include //utils +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) + static void deepCopyUtil(char* dest, Toy_String* str) { //sometimes, "clever" can be a bad thing... if (str->type == TOY_STRING_NODE) { @@ -45,20 +47,13 @@ static unsigned int hashCString(const char* string) { return hash; } -//exposed functions -Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) { - int length = strlen(cstring); - - return Toy_createStringLength(bucketHandle, cstring, length); -} - -Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length) { - if (length > TOY_STRING_MAX_LENGTH) { - fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH); +static Toy_String* partitionStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) { + if (sizeof(Toy_String) + length + 1 > (*bucketHandle)->capacity) { + fprintf(stderr, TOY_CC_ERROR "ERROR: Can't partition enough space for a string, requested %d length (%d total) but buckets have a capacity of %d\n" TOY_CC_RESET, (int)length, (int)(sizeof(Toy_String) + length + 1), (int)((*bucketHandle)->capacity)); exit(-1); } - Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity + Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); ret->type = TOY_STRING_LEAF; ret->length = length; @@ -70,15 +65,42 @@ Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstrin return ret; } -TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type) { - int length = strlen(cname); +//exposed functions +Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) { + unsigned int length = strlen(cstring); - if (length > TOY_STRING_MAX_LENGTH) { - fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a name string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH); + return Toy_createStringLength(bucketHandle, cstring, length); +} + +Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) { + //normal behaviour + if (length < (*bucketHandle)->capacity - sizeof(Toy_String) - 1) { + return partitionStringLength(bucketHandle, cstring, length); + } + + //break the string up if it's too long + Toy_String* result = NULL; + + for (unsigned int i = 0; i < length; i += (*bucketHandle)->capacity - sizeof(Toy_String) - 1) { //increment by the amount actually used by the cstring + unsigned int amount = MIN((length - i), (*bucketHandle)->capacity - sizeof(Toy_String) - 1); + Toy_String* fragment = partitionStringLength(bucketHandle, cstring + i, amount); + + result = result == NULL ? fragment : Toy_concatStrings(bucketHandle, result, fragment); + } + + return result; +} + +TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type) { + unsigned int length = strlen(cname); + + //name strings can't be broken up + if (sizeof(Toy_String) + length + 1 > (*bucketHandle)->capacity) { + fprintf(stderr, TOY_CC_ERROR "ERROR: Can't partition enough space for a name string, requested %d length (%d total) but buckets have a capacity of %d\n" TOY_CC_RESET, (int)length, (int)(sizeof(Toy_String) + length + 1), (int)((*bucketHandle)->capacity)); exit(-1); } - Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity + Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); ret->type = TOY_STRING_NAME; ret->length = length; @@ -105,7 +127,16 @@ Toy_String* Toy_deepCopyString(Toy_Bucket** bucketHandle, Toy_String* str) { fprintf(stderr, TOY_CC_ERROR "ERROR: Can't deep copy a string with refcount of zero\n" TOY_CC_RESET); exit(-1); } - Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + str->length + 1); //TODO: compensate for partitioning more space than bucket capacity + + //handle deep copies of strings that are too long for the bucket capacity NOTE: slow, could replace this at some point + if (sizeof(Toy_String) + str->length + 1 > (*bucketHandle)->capacity) { + char* buffer = Toy_getStringRawBuffer(str); + Toy_String* result = Toy_createStringLength(bucketHandle, buffer, str->length); //handles the fragmenting + free(buffer); + return result; + } + + Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + str->length + 1); if (str->type == TOY_STRING_NODE || str->type == TOY_STRING_LEAF) { ret->type = TOY_STRING_LEAF; @@ -138,11 +169,6 @@ Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* left, Toy_S exit(-1); } - if (left->length + right->length > TOY_STRING_MAX_LENGTH) { - fprintf(stderr, TOY_CC_ERROR "ERROR: Can't concat a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH); - exit(-1); - } - Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String)); ret->type = TOY_STRING_NODE; @@ -162,11 +188,11 @@ void Toy_freeString(Toy_String* str) { decrementRefCount(str); //TODO: tool for checking the bucket is empty, and freeing it } -int Toy_getStringLength(Toy_String* str) { +unsigned int Toy_getStringLength(Toy_String* str) { return str->length; } -int Toy_getStringRefCount(Toy_String* str) { +unsigned int Toy_getStringRefCount(Toy_String* str) { return str->refCount; } diff --git a/source/toy_string.h b/source/toy_string.h index 61d208d..701a7a9 100644 --- a/source/toy_string.h +++ b/source/toy_string.h @@ -5,9 +5,6 @@ #include "toy_bucket.h" #include "toy_value.h" -//TODO: Remove this (related to partitioning more space in a bucket issue) -#define TOY_STRING_MAX_LENGTH 1000 - //rope pattern typedef struct Toy_String { //32 | 64 BITNESS enum Toy_StringType { @@ -39,7 +36,7 @@ typedef struct Toy_String { //32 | 64 BITNESS } Toy_String; //24 | 32 TOY_API Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring); -TOY_API Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length); +TOY_API Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length); TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type); //for variable names @@ -50,8 +47,8 @@ TOY_API Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* lef TOY_API void Toy_freeString(Toy_String* str); -TOY_API int Toy_getStringLength(Toy_String* str); -TOY_API int Toy_getStringRefCount(Toy_String* str); +TOY_API unsigned int Toy_getStringLength(Toy_String* str); +TOY_API unsigned int Toy_getStringRefCount(Toy_String* str); TOY_API char* Toy_getStringRawBuffer(Toy_String* str); //allocates the buffer on the heap, needs to be freed diff --git a/tests/cases/test_string.c b/tests/cases/test_string.c index 004d866..02920ce 100644 --- a/tests/cases/test_string.c +++ b/tests/cases/test_string.c @@ -792,6 +792,36 @@ int test_string_diffs() { return 0; } +int test_string_fragmenting() { + //allocate a long string + { + //setup + Toy_Bucket* bucket = Toy_allocateBucket(128); //deliberately too small for the cstring + + //445 charaters + const char* cstring = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; + + Toy_String* str = Toy_createString(&bucket, cstring); + + //check + if (str->type != TOY_STRING_NODE || + str->length != 445 || + str->refCount != 1) + { + fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to fragment a string within Toy_String\n" TOY_CC_RESET); + Toy_freeString(str); + Toy_freeBucket(&bucket); + return -1; + } + + //cleanup + Toy_freeString(str); + Toy_freeBucket(&bucket); + } + + return 0; +} + int main() { //run each test set, returning the total errors given int total = 0, res = 0; @@ -848,5 +878,13 @@ int main() { total += res; } + { + res = test_string_fragmenting(); + if (res == 0) { + printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET); + } + total += res; + } + return total; }