Implemented 'Toy_Table' hashtable with robin hood algorithm, untested

This commit is contained in:
2024-10-03 16:33:47 +10:00
parent a0d616f412
commit 5cf2e70b7d
6 changed files with 266 additions and 2 deletions

View File

@@ -14,7 +14,7 @@ Toy_Array* Toy_resizeArray(Toy_Array* paramArray, unsigned int capacity) {
if (array == NULL) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to allocate a 'Toy_Array' of %d capacity\n" TOY_CC_RESET, (int)capacity);
exit(1);
exit(-1);
}
array->capacity = capacity;

162
source/toy_table.c Normal file
View File

@@ -0,0 +1,162 @@
#include "toy_table.h"
#include "toy_console_colors.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//'count' actually tracks the number of values
#define MIN_CAPACITY 16
//utils
static Toy_Table* adjustTableCapacity(Toy_Table* oldTable, unsigned int newCapacity) {
//allocate and zero a new table in memory
Toy_Table* newTable = malloc(newCapacity * sizeof(Toy_TableEntry) + sizeof(Toy_Table));
newTable->capacity = newCapacity;
newTable->count = 0;
newTable->minPsl = 0;
newTable->maxPsl = 0;
//unlike other structures, the empty space in a table needs to be null
memset(newTable + 1, 0, newTable->capacity * sizeof(Toy_TableEntry));
if (oldTable == NULL) { //for initial allocations
return newTable;
}
//for each entry in the old table, copy it into the new table
for (int i = 0; i < oldTable->capacity; i++) {
Toy_insertTable(&newTable, oldTable->data[i].key, oldTable->data[i].value);
}
//clean up and return
free(oldTable);
return newTable;
}
//exposed functions
Toy_Table* Toy_allocateTable() {
return adjustTableCapacity(NULL, MIN_CAPACITY);
}
void Toy_freeTable(Toy_Table* table) {
//TODO: slip in a call to free the complex values here
free(table);
}
void Toy_insertTable(Toy_Table** table, Toy_Value key, Toy_Value value) {
if (TOY_VALUE_IS_NULL(key) || TOY_VALUE_IS_BOOLEAN(key)) { //TODO: disallow functions and opaques
fprintf(stderr, TOY_CC_ERROR "ERROR: Bad table key\n" TOY_CC_RESET);
exit(-1); //TODO: #127
}
//expand the capacity
if ((*table)->capacity < (*table)->count * (1 / 0.75f)) {
(*table) = adjustTableCapacity(*table, (*table)->capacity * 2);
}
//insert
unsigned int probe = Toy_hashValue(key) % (*table)->capacity;
Toy_TableEntry entry = (Toy_TableEntry){ .key = key, .value = value, .psl = 0 };
while (true) {
//if this spot is free, insert and return
if (TOY_VALUE_IS_NULL((*table)->data[probe].key)) {
(*table)->data[probe] = entry;
(*table)->count++;
//TODO: benchmark the psl optimisation
(*table)->minPsl = entry.psl < (*table)->minPsl ? entry.psl : (*table)->minPsl;
(*table)->maxPsl = entry.psl > (*table)->maxPsl ? entry.psl : (*table)->maxPsl;
return;
}
//if the new entry is "poorer", insert it and shift the old one
if ((*table)->data[probe].psl < entry.psl) {
Toy_TableEntry tmp = (*table)->data[probe];
(*table)->data[probe] = entry;
entry = tmp;
}
//adjust and continue
probe = (probe + 1) % (*table)->capacity;
entry.psl++;
}
}
Toy_Value Toy_lookupTableValue(Toy_Table** table, Toy_Value key) {
if (TOY_VALUE_IS_NULL(key) || TOY_VALUE_IS_BOOLEAN(key)) { //TODO: disallow functions and opaques
fprintf(stderr, TOY_CC_ERROR "ERROR: Bad table key\n" TOY_CC_RESET);
exit(-1); //TODO: #127
}
//lookup
unsigned int probe = Toy_hashValue(key) % (*table)->capacity;
unsigned int counter = 0;
while (true) {
//found the entry
if (TOY_VALUE_IS_EQUAL((*table)->data[probe].key, key)) {
return (*table)->data[probe].value;
}
//if the psl is too big, or empty slot
if ((*table)->data[probe].psl > counter || TOY_VALUE_IS_NULL((*table)->data[probe].key)) {
return TOY_VALUE_TO_NULL();
}
//adjust and continue
probe = (probe + 1) % (*table)->capacity;
counter++;
}
}
void Toy_removeTableEntry(Toy_Table** table, Toy_Value key) {
if (TOY_VALUE_IS_NULL(key) || TOY_VALUE_IS_BOOLEAN(key)) { //TODO: disallow functions and opaques
fprintf(stderr, TOY_CC_ERROR "ERROR: Bad table key\n" TOY_CC_RESET);
exit(-1); //TODO: #127
}
//lookup
unsigned int probe = Toy_hashValue(key) % (*table)->capacity;
unsigned int counter = 0;
unsigned int wipe = probe; //wiped at the end
while (true) {
//found the entry
if (TOY_VALUE_IS_EQUAL((*table)->data[probe].key, key)) {
break;
}
//if the psl is too big, or empty slot
if ((*table)->data[probe].psl > counter || TOY_VALUE_IS_NULL((*table)->data[probe].key)) {
return;
}
//adjust and continue
probe = (probe + 1) % (*table)->capacity;
counter++;
}
//shift down the later entries (past the probing point)
for (unsigned int i = (*table)->minPsl; i < (*table)->maxPsl; i++) {
unsigned int p = (probe + i + 0) % (*table)->capacity; //prev
unsigned int u = (probe + i + 1) % (*table)->capacity; //current
//if the psl is too big, or an empty slot, stop
if ((*table)->data[u].psl > (counter + i) || TOY_VALUE_IS_NULL((*table)->data[u].key)) {
break;
}
(*table)->data[p] = (*table)->data[u];
(*table)->data[p].psl--;
wipe = wipe % (*table)->capacity;
}
//finally, wipe the removed entry
(*table)->data[wipe] = (Toy_TableEntry){ .key = TOY_VALUE_TO_NULL(), .value = TOY_VALUE_TO_NULL(), .psl = 0 };
}

27
source/toy_table.h Normal file
View File

@@ -0,0 +1,27 @@
#pragma once
#include "toy_common.h"
#include "toy_value.h"
//key-value entry, and probe sequence length - https://programming.guide/robin-hood-hashing.html
typedef struct Toy_TableEntry { //32 | 64 BITNESS
Toy_Value key; //8 | 8
Toy_Value value; //8 | 8
unsigned int psl; //4 | 4
} Toy_TableEntry; //20 | 20
//key-value table (contains = count + tombstones)
typedef struct Toy_Table { //32 | 64 BITNESS
unsigned int capacity; //4 | 4
unsigned int count; //4 | 4
unsigned int minPsl; //4 | 4
unsigned int maxPsl; //4 | 4
Toy_TableEntry data[]; //- | -
} Toy_Table; //16 | 16
TOY_API Toy_Table* Toy_allocateTable();
TOY_API void Toy_freeTable(Toy_Table* table);
TOY_API void Toy_insertTable(Toy_Table** table, Toy_Value key, Toy_Value value);
TOY_API Toy_Value Toy_lookupTableValue(Toy_Table** table, Toy_Value key);
TOY_API void Toy_removeTableEntry(Toy_Table** table, Toy_Value key);

View File

@@ -8,7 +8,7 @@ bool Toy_private_isTruthy(Toy_Value value) {
//null is an error
if (TOY_VALUE_IS_NULL(value)) {
fprintf(stderr, TOY_CC_ERROR "ERROR: 'null' is neither true nor false\n" TOY_CC_RESET);
exit(-1); //TODO: return false or exit()?
exit(-1); //TODO: #127
}
//only 'false' is falsy
@@ -62,3 +62,48 @@ bool Toy_private_isEqual(Toy_Value left, Toy_Value right) {
exit(-1);
}
}
//hash utils
static unsigned int hashCString(const char* string) {
unsigned int hash = 2166136261u;
for (unsigned int i = 0; string[i]; i++) {
hash *= string[i];
hash ^= 16777619;
}
return hash;
}
static unsigned int hashUInt(unsigned int x) {
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = (x >> 16) ^ x;
return x;
}
unsigned int Toy_hashValue(Toy_Value value) {
switch(value.type) {
case TOY_VALUE_NULL:
return 0;
case TOY_VALUE_BOOLEAN:
return TOY_VALUE_AS_BOOLEAN(value) ? 1 : 0;
case TOY_VALUE_INTEGER:
return hashUInt(TOY_VALUE_AS_INTEGER(value));
case TOY_VALUE_FLOAT:
return hashUInt( *((int*)(&TOY_VALUE_AS_FLOAT(value))) );
case TOY_VALUE_STRING:
case TOY_VALUE_ARRAY:
case TOY_VALUE_DICTIONARY:
case TOY_VALUE_FUNCTION:
case TOY_VALUE_OPAQUE:
default:
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't hash an unknown type %d\n" TOY_CC_RESET, value.type);
exit(-1);
}
}

View File

@@ -56,3 +56,6 @@ TOY_API bool Toy_private_isTruthy(Toy_Value value);
#define TOY_VALUE_IS_EQUAL(left, right) Toy_private_isEqual(left, right)
TOY_API bool Toy_private_isEqual(Toy_Value left, Toy_Value right);
unsigned int Toy_hashValue(Toy_Value value);