Files
Toy/source/toy_string.c
T
Ratstail91 b718b35097 FIX: Substrings had corner-cases with incorrect results, read more
I found this while writing unit tests for Toy_Function, where one
(native) function was named 'identity' and another (custom) was named
'ident' to avoid a naming clash. The rename didn't resolve the clash, so
after some digging, I found that strings compared to substrings would
return a match, despite being different.

This took some awkward corner-case handling, as it turns out
'deepCompareUtil' only returns zero when no differences have been found,
not when a match has been found. I also added checks for this to
Toy_String's unit test, with the parameters checked in both orders i.e.
(a,b) and (b,a), because paranoia is your friend.

The rope pattern is powerful, but also gives you enough rope to hang
yourself.
2026-04-26 22:52:24 +10:00

259 lines
7.4 KiB
C

#include "toy_string.h"
#include "toy_console_colors.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//utils
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
static void incrementRefCount(Toy_String* str) {
str->info.refCount++;
if (str->info.type == TOY_STRING_NODE) {
incrementRefCount(str->node.left);
incrementRefCount(str->node.right);
}
}
static void decrementRefCount(Toy_String* str) {
str->info.refCount--;
if (str->info.type == TOY_STRING_NODE) {
decrementRefCount(str->node.left);
decrementRefCount(str->node.right);
}
}
//exposed functions
Toy_String* Toy_toString(Toy_Bucket** bucketHandle, const char* cstring) {
return Toy_toStringLength(bucketHandle, cstring, strlen(cstring));
}
Toy_String* Toy_toStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) {
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String));
ret->info.type = TOY_STRING_LEAF;
ret->info.length = length;
ret->info.refCount = 1;
ret->info.cachedHash = 0; //don't calc until needed
ret->leaf.data = cstring; //don't make a local copy
return ret;
}
Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) {
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String));
if (length > 0) {
ret->leaf.data = (char*)Toy_partitionBucket(bucketHandle, length + 1);
strncpy((char*)(ret->leaf.data), cstring, length);
((char*)(ret->leaf.data))[length] = '\0'; //don't forget the null
ret->info.length = length;
}
else {
ret->leaf.data = "";
ret->info.length = length;
}
ret->info.type = TOY_STRING_LEAF;
ret->info.refCount = 1;
ret->info.cachedHash = 0; //don't calc until needed
return ret;
}
Toy_String* Toy_copyString(Toy_String* str) {
assert(str->info.refCount != 0 && "Can't copy a string with refcount of zero");
incrementRefCount(str);
return str;
}
Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* left, Toy_String* right) {
assert(left->info.refCount != 0 && right->info.refCount != 0 && "Can't concatenate a string with a refcount of zero");
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String));
ret->info.type = TOY_STRING_NODE;
ret->info.length = left->info.length + right->info.length;
ret->info.refCount = 1;
ret->info.cachedHash = 0; //don't calc until needed
ret->node.left = left;
ret->node.right = right;
incrementRefCount(left);
incrementRefCount(right);
return ret;
}
void Toy_freeString(Toy_String* str) {
assert(str->info.refCount > 0 && "Can't free a string with refcount 0");
//memory is freed when the bucket is
decrementRefCount(str);
}
unsigned int Toy_getStringLength(Toy_String* str) {
return str->info.length;
}
unsigned int Toy_getStringRefCount(Toy_String* str) {
return str->info.refCount;
}
static void getStringRawUtil(char* dest, Toy_String* str) {
//sometimes, "clever" can be a bad thing...
if (str->info.type == TOY_STRING_NODE) {
getStringRawUtil(dest, str->node.left);
getStringRawUtil(dest + str->node.left->info.length, str->node.right);
}
else {
memcpy(dest, str->leaf.data, str->info.length);
}
}
char* Toy_getStringRaw(Toy_String* str) {
assert(str->info.refCount != 0 && "Can't build a raw string from a string with refcount of zero");
//BUGFIX: Make sure it's aligned, and there's space for the null
unsigned int len = (str->info.length + 3) & ~3;
if (len == str->info.length) { //nulls aren't counted in a string's length
len += 4;
}
char* buffer = malloc(len);
getStringRawUtil(buffer, str);
buffer[str->info.length] = '\0';
return buffer;
}
static int deepCompareUtil(Toy_String* left, Toy_String* right, const char** leftHead, const char** rightHead) {
//NOTE: this function can't handle strings of zero length
int result = 0;
//if it's the same object, of course they match
if (left == right) {
return result;
}
//BUGFIX: if we're not currently iterating through the left leaf (and leftHead is not null), skip out
if (left->info.type == TOY_STRING_LEAF && (*leftHead) != NULL && (**leftHead) != '\0' && ((*leftHead) < left->leaf.data || (*leftHead) > (left->leaf.data + left->info.length)) ) {
return result;
}
//BUGFIX: if we're not currently iterating through the right leaf (and rightHead is not null), skip out
if (right->info.type == TOY_STRING_LEAF && (*rightHead) != NULL && (**rightHead) != '\0' && ((*rightHead) < right->leaf.data || (*rightHead) > (right->leaf.data + right->info.length)) ) {
return result;
}
//dig into left
if (left->info.type == TOY_STRING_NODE) {
if ((result = deepCompareUtil(left->node.left, right, leftHead, rightHead)) != 0) {
return result;
}
if ((result = deepCompareUtil(left->node.right, right, leftHead, rightHead)) != 0) {
return result;
}
//return zero to keep going
return result;
}
//dig into right
if (right->info.type == TOY_STRING_NODE) {
if ((result = deepCompareUtil(left, right->node.left, leftHead, rightHead)) != 0) {
return result;
}
if ((result = deepCompareUtil(left, right->node.right, leftHead, rightHead)) != 0) {
return result;
}
//return zero to keep going
return result;
}
//keep comparing the leaves
if (left->info.type == TOY_STRING_LEAF && right->info.type == TOY_STRING_LEAF) {
//initial head states can be null, or null characters
if ((*leftHead) == NULL || (**leftHead) == '\0') {
(*leftHead) = left->leaf.data;
}
if ((*rightHead) == NULL || (**rightHead) == '\0') {
(*rightHead) = right->leaf.data;
}
//compare and increment
while (**leftHead && (**leftHead == **rightHead)) {
(*leftHead)++;
(*rightHead)++;
}
//if there's a difference, and neither is null, than a result has (probably) been found
if ((**leftHead != '\0' && **rightHead != '\0' && (**leftHead != **rightHead))) {
result = *(const unsigned char*)(*leftHead) - *(const unsigned char*)(*rightHead);
}
}
//returning 0 means no difference found yet
return result;
}
int Toy_compareStrings(Toy_String* left, Toy_String* right) {
//BUGFIX: since deepCompareUtil() can't handle strings of length zero, insert a check here
if (left->info.length == 0 || right->info.length == 0) {
return left->info.length - right->info.length;
}
//BUGFIX: If both args are leaves, and one is a substring of the other, then deepCompareUtil() will return a wrong result
if (left->info.type == TOY_STRING_LEAF && right->info.type == TOY_STRING_LEAF) {
unsigned int maxLength = left->info.length > right->info.length ? left->info.length : right->info.length;
return strncmp(left->leaf.data, right->leaf.data, maxLength);
}
//util pointers
const char* leftHead = NULL;
const char* rightHead = NULL;
int result = deepCompareUtil(left, right, &leftHead, &rightHead);
//BUGFIX: deepCompareUtil() doesn't handle substrings correctly
if (result == 0 && leftHead != NULL && rightHead != NULL) {
return (int)(*leftHead - *rightHead);
}
else {
return result;
}
}
static unsigned int hashCString(const char* string) {
unsigned int hash = 2166136261u;
for (unsigned int i = 0; string[i]; i++) {
hash *= string[i];
hash ^= 16777619;
}
return hash;
}
unsigned int Toy_hashString(Toy_String* str) {
if (str->info.cachedHash != 0) {
return str->info.cachedHash;
}
else if (str->info.type == TOY_STRING_NODE) {
char* buffer = Toy_getStringRaw(str);
str->info.cachedHash = hashCString(buffer);
free(buffer);
}
else {
str->info.cachedHash = hashCString(str->leaf.data);
}
return str->info.cachedHash;
}