String literals are being parsed, compiled and printed, read more

Strings, due to their potentially large size, are stored outside of a routine's code section, in the data section. To access the correct string, you must read the jump index, then the real address from the jump table - and extra layer of indirection will result in more flexible data down the road, I hope. Other changes include: * Added string concat operator .. * Added TOY_STRING_MAX_LENGTH * Strings can't be created or concatenated longer than the max length * The parser will display a warning if the bucket is too small for a string at max length, but it will continue * Added TOY_BUCKET_IDEAL to correspend with max string length * The bucket now allocates an address that is 4-byte aligned * Fixed missing entries in the parser rule table * Corrected some failing TOY_BITNESS tests
2024-10-07 23:05:36 +11:00
parent 14653a303f
commit 4bcf8e84a9
23 changed files with 572 additions and 195 deletions
@@ -45,6 +45,7 @@ typedef enum Toy_AstFlag {
 	TOY_AST_FLAG_COMPARE_GREATER_EQUAL,
 	TOY_AST_FLAG_AND,
 	TOY_AST_FLAG_OR,
+	TOY_AST_FLAG_CONCAT,

 	//unary flags
 	TOY_AST_FLAG_NEGATE,
@@ -108,7 +109,7 @@ typedef struct Toy_AstEnd {
 union Toy_Ast {             //32 | 64 BITNESS
 	Toy_AstType type;       //4  | 4
 	Toy_AstBlock block;     //16 | 32
-	Toy_AstValue value;     //12 | 12
+	Toy_AstValue value;     //12 | 24
 	Toy_AstUnary unary;     //12 | 16
 	Toy_AstBinary binary;   //16 | 24
 	Toy_AstGroup group;     //8  | 16
@@ -32,6 +32,11 @@ void* Toy_partitionBucket(Toy_Bucket** bucketHandle, unsigned int amount) {
 		exit(1);
 	}

+	//BUGFIX: the endpoint must be aligned to the word size, otherwise you'll get a bus error from moving pointers
+	if (amount % 4 != 0) {
+		amount += 4 - (amount % 4); //ceil
+	}
+
 	//if you try to allocate too much space
 	if ((*bucketHandle)->capacity < amount) {
 		fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to partition a 'Toy_Bucket': requested %d from a bucket of %d capacity\n" TOY_CC_RESET, (int)amount, (int)((*bucketHandle)->capacity));
@@ -20,3 +20,9 @@ TOY_API Toy_Bucket* Toy_allocateBucket(unsigned int capacity);
 TOY_API void* Toy_partitionBucket(Toy_Bucket** bucketHandle, unsigned int amount);
 TOY_API void Toy_freeBucket(Toy_Bucket** bucketHandle);

+//some useful bucket sizes
+#define TOY_BUCKET_SMALL 256
+#define TOY_BUCKET_MEDIUM 512
+#define TOY_BUCKET_LARGE 1024
+
+#define TOY_BUCKET_IDEAL 1024
@@ -35,6 +35,7 @@ typedef enum Toy_OpcodeType {

 	//various action instructions
 	TOY_OPCODE_PRINT,
+	TOY_OPCODE_CONCAT,
 	//TODO: clear the program stack

 	//meta instructions
@@ -1,6 +1,9 @@
 #include "toy_parser.h"
 #include "toy_console_colors.h"

+#include "toy_value.h"
+#include "toy_string.h"
+
 #include <stdio.h>

 //utilities
@@ -107,14 +110,14 @@ typedef struct ParsingTuple {

 static void parsePrecedence(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle, ParsingPrecedence precRule);

-static Toy_AstFlag atomic(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle);
+static Toy_AstFlag literal(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle);
 static Toy_AstFlag unary(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle);
 static Toy_AstFlag binary(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle);
 static Toy_AstFlag group(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle);

 //precedence definitions
 static ParsingTuple parsingRulesetTable[] = {
-	{PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_NULL,
+	{PREC_PRIMARY,literal,NULL},// TOY_TOKEN_NULL,

 	//variable names
 	{PREC_NONE,NULL,NULL},// TOY_TOKEN_IDENTIFIER,
@@ -157,11 +160,11 @@ static ParsingTuple parsingRulesetTable[] = {
 	{PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_YIELD,

 	//literal values
-	{PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_TRUE,
-	{PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_FALSE,
-	{PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_INTEGER,
-	{PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_FLOAT,
-	{PREC_NONE,NULL,NULL},// TOY_TOKEN_LITERAL_STRING,
+	{PREC_PRIMARY,literal,NULL},// TOY_TOKEN_LITERAL_TRUE,
+	{PREC_PRIMARY,literal,NULL},// TOY_TOKEN_LITERAL_FALSE,
+	{PREC_PRIMARY,literal,NULL},// TOY_TOKEN_LITERAL_INTEGER,
+	{PREC_PRIMARY,literal,NULL},// TOY_TOKEN_LITERAL_FLOAT,
+	{PREC_PRIMARY,literal,NULL},// TOY_TOKEN_LITERAL_STRING,

 	//math operators
 	{PREC_TERM,NULL,binary},// TOY_TOKEN_OPERATOR_ADD,
@@ -201,7 +204,11 @@ static ParsingTuple parsingRulesetTable[] = {
 	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_QUESTION,
 	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_COLON,

-	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_CONCAT, // ..
+	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_SEMICOLON, // ;
+	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_COMMA, // ,
+
+	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_DOT, // .
+	{PREC_CALL,NULL,binary},// TOY_TOKEN_OPERATOR_CONCAT, // ..
 	{PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_REST, // ...

 	//unused operators
@@ -214,7 +221,7 @@ static ParsingTuple parsingRulesetTable[] = {
 	{PREC_NONE,NULL,NULL},// TOY_TOKEN_EOF,
 };

-static Toy_AstFlag atomic(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle) {
+static Toy_AstFlag literal(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast** rootHandle) {
 	switch(parser->previous.type) {
 		case TOY_TOKEN_NULL:
 			Toy_private_emitAstValue(bucketHandle, rootHandle, TOY_VALUE_FROM_NULL());
@@ -262,8 +269,40 @@ static Toy_AstFlag atomic(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast
 			return TOY_AST_FLAG_NONE;
 		}

+		case TOY_TOKEN_LITERAL_STRING: {
+			char buffer[parser->previous.length + 1];
+
+			unsigned int i = 0, o = 0;
+			do {
+				buffer[i] = parser->previous.lexeme[o];
+				if (buffer[i] == '\\' && parser->previous.lexeme[++o]) {
+					//also handle escape characters
+					switch(parser->previous.lexeme[o]) {
+						case 'n':
+							buffer[i] = '\n';
+							break;
+						case 't':
+							buffer[i] = '\t';
+							break;
+						case '\\':
+							buffer[i] = '\\';
+							break;
+						case '"':
+							buffer[i] = '"';
+							break;
+					}
+				}
+				i++;
+			} while (parser->previous.lexeme[o++] && i < parser->previous.length);
+
+			buffer[i] = '\0';
+			Toy_private_emitAstValue(bucketHandle, rootHandle, TOY_VALUE_FROM_STRING(Toy_createStringLength(bucketHandle, buffer, i)));
+
+			return TOY_AST_FLAG_NONE;
+		}
+
 		default:
-			printError(parser, parser->previous, "Unexpected token passed to atomic precedence rule");
+			printError(parser, parser->previous, "Unexpected token passed to literal precedence rule");
 			Toy_private_emitAstError(bucketHandle, rootHandle);
 			return TOY_AST_FLAG_NONE;
 	}
@@ -405,6 +444,11 @@ static Toy_AstFlag binary(Toy_Bucket** bucketHandle, Toy_Parser* parser, Toy_Ast
 			return TOY_AST_FLAG_COMPARE_GREATER_EQUAL;
 		}

+		case TOY_TOKEN_OPERATOR_CONCAT: {
+			parsePrecedence(bucketHandle, parser, rootHandle, PREC_CALL + 1);
+			return TOY_AST_FLAG_CONCAT;
+		}
+
 		default:
 			printError(parser, parser->previous, "Unexpected token passed to binary precedence rule");
 			Toy_private_emitAstError(bucketHandle, rootHandle);
@@ -574,6 +618,11 @@ void Toy_bindParser(Toy_Parser* parser, Toy_Lexer* lexer) {
 Toy_Ast* Toy_scanParser(Toy_Bucket** bucketHandle, Toy_Parser* parser) {
 	Toy_Ast* rootHandle = NULL;

+	//double check bucket capacity for strings
+	if ((*bucketHandle)->capacity < TOY_STRING_MAX_LENGTH) {
+		fprintf(stderr, TOY_CC_WARN "WARNING: Bucket capacity in Toy_scanParser() is smaller than TOY_STRING_MAX_LENGTH" TOY_CC_RESET);
+	}
+
 	//check for EOF
 	if (match(parser, TOY_TOKEN_EOF)) {
 		Toy_private_emitAstEnd(bucketHandle, &rootHandle);
@@ -3,6 +3,7 @@

 #include "toy_opcodes.h"
 #include "toy_value.h"
+#include "toy_string.h"

 #include <stdio.h>
 #include <stdlib.h>
@@ -17,7 +18,7 @@ static void expand(void** handle, unsigned int* capacity, unsigned int* count, u
 		(*handle) = realloc((*handle), (*capacity));

 		if ((*handle) == NULL) {
-			fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to allocate a 'Toy_Routine' of %d capacity\n" TOY_CC_RESET, (int)(*capacity));
+			fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to allocate %d space for a part of 'Toy_Routine'\n" TOY_CC_RESET, (int)(*capacity));
 			exit(1);
 		}
 	}
@@ -45,48 +46,91 @@ static void emitFloat(void** handle, unsigned int* capacity, unsigned int* count
 }

 //write instructions based on the AST types
-#define EMIT_BYTE(rt, byte) \
-	emitByte((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte);
-#define EMIT_INT(rt, code, byte) \
-	emitInt((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte);
-#define EMIT_FLOAT(rt, code, byte) \
-	emitFloat((void**)(&((*rt)->code)), &((*rt)->codeCapacity), &((*rt)->codeCount), byte);
+#define EMIT_BYTE(rt, part, byte) \
+	emitByte((void**)(&((*rt)->part)), &((*rt)->part##Capacity), &((*rt)->part##Count), byte);
+#define EMIT_INT(rt, part, bytes) \
+	emitInt((void**)(&((*rt)->part)), &((*rt)->part##Capacity), &((*rt)->part##Count), bytes);
+#define EMIT_FLOAT(rt, part, bytes) \
+	emitFloat((void**)(&((*rt)->part)), &((*rt)->part##Capacity), &((*rt)->part##Count), bytes);
+
+static void emitToJumpTable(Toy_Routine** rt, unsigned int startAddr) {
+	EMIT_INT(rt, code, (*rt)->jumpsCount); //mark the jump index in the code
+	EMIT_INT(rt, jumps, startAddr); //save address at the jump index
+}
+
+static void emitString(Toy_Routine** rt, Toy_String* str) {
+	//4-byte alignment
+	unsigned int length = str->length + 1;
+	if (length % 4 != 0) {
+		length += 4 - (length % 4); //ceil
+	}
+
+	//grab the current start address
+	unsigned int startAddr = (*rt)->dataCount;
+
+	//move the string into the data section
+	expand((void**)(&((*rt)->data)), &((*rt)->dataCapacity), &((*rt)->dataCount), (*rt)->dataCount + length);
+
+	if (str->type == TOY_STRING_NODE) {
+		char* buffer = Toy_getStringRawBuffer(str);
+		memcpy((*rt)->data + (*rt)->dataCount, buffer, str->length + 1);
+		free(buffer);
+	}
+	else if (str->type == TOY_STRING_LEAF) {
+		memcpy((*rt)->data + (*rt)->dataCount, str->as.leaf.data, str->length + 1);
+	}
+	else if (str->type == TOY_STRING_NAME) {
+		memcpy((*rt)->data + (*rt)->dataCount, str->as.name.data, str->length + 1);
+	}
+
+	(*rt)->dataCount += length;
+
+	//mark the jump position
+	emitToJumpTable(rt, startAddr);
+}

 static void writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast); //forward declare for recursion

 static void writeInstructionValue(Toy_Routine** rt, Toy_AstValue ast) {
 	//TODO: store more complex values in the data code
-	EMIT_BYTE(rt, TOY_OPCODE_READ);
-	EMIT_BYTE(rt, ast.value.type);
+	EMIT_BYTE(rt, code, TOY_OPCODE_READ);
+	EMIT_BYTE(rt, code, ast.value.type);

 	//emit the raw value based on the type
 	if (TOY_VALUE_IS_NULL(ast.value)) {
 		//NOTHING - null's type data is enough

 		//4-byte alignment
-		EMIT_BYTE(rt, 0);
-		EMIT_BYTE(rt, 0);
+		EMIT_BYTE(rt, code, 0);
+		EMIT_BYTE(rt, code, 0);
 	}
 	else if (TOY_VALUE_IS_BOOLEAN(ast.value)) {
-		EMIT_BYTE(rt, TOY_VALUE_AS_BOOLEAN(ast.value));
+		EMIT_BYTE(rt, code, TOY_VALUE_AS_BOOLEAN(ast.value));

 		//4-byte alignment
-		EMIT_BYTE(rt, 0);
+		EMIT_BYTE(rt, code, 0);
 	}
 	else if (TOY_VALUE_IS_INTEGER(ast.value)) {
 		//4-byte alignment
-		EMIT_BYTE(rt, 0);
-		EMIT_BYTE(rt, 0);
+		EMIT_BYTE(rt, code, 0);
+		EMIT_BYTE(rt, code, 0);

 		EMIT_INT(rt, code, TOY_VALUE_AS_INTEGER(ast.value));
 	}
 	else if (TOY_VALUE_IS_FLOAT(ast.value)) {
 		//4-byte alignment
-		EMIT_BYTE(rt, 0);
-		EMIT_BYTE(rt, 0);
+		EMIT_BYTE(rt, code, 0);
+		EMIT_BYTE(rt, code, 0);

 		EMIT_FLOAT(rt, code, TOY_VALUE_AS_FLOAT(ast.value));
 	}
+	else if (TOY_VALUE_IS_STRING(ast.value)) {
+		//4-byte alignment
+		EMIT_BYTE(rt, code, 0);
+		EMIT_BYTE(rt, code, 0);
+
+		emitString(rt, TOY_VALUE_AS_STRING(ast.value));
+	}
 	else {
 		fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST type found: Unknown value type\n" TOY_CC_RESET);
 		exit(-1);
@@ -98,12 +142,12 @@ static void writeInstructionUnary(Toy_Routine** rt, Toy_AstUnary ast) {
 	writeRoutineCode(rt, ast.child);

 	if (ast.flag == TOY_AST_FLAG_NEGATE) {
-		EMIT_BYTE(rt, TOY_OPCODE_NEGATE);
+		EMIT_BYTE(rt, code, TOY_OPCODE_NEGATE);

 		//4-byte alignment
-		EMIT_BYTE(rt, 0);
-		EMIT_BYTE(rt, 0);
-		EMIT_BYTE(rt, 0);
+		EMIT_BYTE(rt, code, 0);
+		EMIT_BYTE(rt, code, 0);
+		EMIT_BYTE(rt, code, 0);
 	}
 	else {
 		fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST unary flag found\n" TOY_CC_RESET);
@@ -117,80 +161,83 @@ static void writeInstructionBinary(Toy_Routine** rt, Toy_AstBinary ast) {
 	writeRoutineCode(rt, ast.right);

 	if (ast.flag == TOY_AST_FLAG_ADD) {
-		EMIT_BYTE(rt, TOY_OPCODE_ADD);
+		EMIT_BYTE(rt, code,TOY_OPCODE_ADD);
 	}
 	else if (ast.flag == TOY_AST_FLAG_SUBTRACT) {
-		EMIT_BYTE(rt, TOY_OPCODE_SUBTRACT);
+		EMIT_BYTE(rt, code,TOY_OPCODE_SUBTRACT);
 	}
 	else if (ast.flag == TOY_AST_FLAG_MULTIPLY) {
-		EMIT_BYTE(rt, TOY_OPCODE_MULTIPLY);
+		EMIT_BYTE(rt, code,TOY_OPCODE_MULTIPLY);
 	}
 	else if (ast.flag == TOY_AST_FLAG_DIVIDE) {
-		EMIT_BYTE(rt, TOY_OPCODE_DIVIDE);
+		EMIT_BYTE(rt, code,TOY_OPCODE_DIVIDE);
 	}
 	else if (ast.flag == TOY_AST_FLAG_MODULO) {
-		EMIT_BYTE(rt, TOY_OPCODE_MODULO);
+		EMIT_BYTE(rt, code,TOY_OPCODE_MODULO);
 	}

 	// else if (ast.flag == TOY_AST_FLAG_ASSIGN) {
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
 	// 	//TODO: emit the env symbol to store TOP(S) within
 	// }
 	// else if (ast.flag == TOY_AST_FLAG_ADD_ASSIGN) {
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ADD);
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ADD);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
 	// 	//TODO: emit the env symbol to store TOP(S) within
 	// }
 	// else if (ast.flag == TOY_AST_FLAG_SUBTRACT_ASSIGN) {
-	// 	EMIT_BYTE(rt, TOY_OPCODE_SUBTRACT);
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_SUBTRACT);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
 	// 	//TODO: emit the env symbol to store TOP(S) within
 	// }
 	// else if (ast.flag == TOY_AST_FLAG_MULTIPLY_ASSIGN) {
-	// 	EMIT_BYTE(rt, TOY_OPCODE_MULTIPLY);
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_MULTIPLY);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
 	// 	//TODO: emit the env symbol to store TOP(S) within
 	// }
 	// else if (ast.flag == TOY_AST_FLAG_DIVIDE_ASSIGN) {
-	// 	EMIT_BYTE(rt, TOY_OPCODE_DIVIDE);
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_DIVIDE);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
 	// 	//TODO: emit the env symbol to store TOP(S) within
 	// }
 	// else if (ast.flag == TOY_AST_FLAG_MODULO_ASSIGN) {
-	// 	EMIT_BYTE(rt, TOY_OPCODE_MODULO);
-	// 	EMIT_BYTE(rt, TOY_OPCODE_ASSIGN);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_MODULO);
+	// 	EMIT_BYTE(rt, code,TOY_OPCODE_ASSIGN);
 	// 	//TODO: emit the env symbol to store TOP(S) within
 	// }

 	else if (ast.flag == TOY_AST_FLAG_COMPARE_EQUAL) {
-		EMIT_BYTE(rt, TOY_OPCODE_COMPARE_EQUAL);
+		EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_EQUAL);
 	}
 	else if (ast.flag == TOY_AST_FLAG_COMPARE_NOT) {
-		EMIT_BYTE(rt, TOY_OPCODE_COMPARE_EQUAL);
-		EMIT_BYTE(rt, TOY_OPCODE_NEGATE); //squeezed into one word
-		EMIT_BYTE(rt, 0);
-		EMIT_BYTE(rt, 0);
+		EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_EQUAL);
+		EMIT_BYTE(rt, code,TOY_OPCODE_NEGATE); //squeezed into one word
+		EMIT_BYTE(rt, code,0);
+		EMIT_BYTE(rt, code,0);

 		return;
 	}
 	else if (ast.flag == TOY_AST_FLAG_COMPARE_LESS) {
-		EMIT_BYTE(rt, TOY_OPCODE_COMPARE_LESS);
+		EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_LESS);
 	}
 	else if (ast.flag == TOY_AST_FLAG_COMPARE_LESS_EQUAL) {
-		EMIT_BYTE(rt, TOY_OPCODE_COMPARE_LESS_EQUAL);
+		EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_LESS_EQUAL);
 	}
 	else if (ast.flag == TOY_AST_FLAG_COMPARE_GREATER) {
-		EMIT_BYTE(rt, TOY_OPCODE_COMPARE_GREATER);
+		EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_GREATER);
 	}
 	else if (ast.flag == TOY_AST_FLAG_COMPARE_GREATER_EQUAL) {
-		EMIT_BYTE(rt, TOY_OPCODE_COMPARE_GREATER_EQUAL);
+		EMIT_BYTE(rt, code,TOY_OPCODE_COMPARE_GREATER_EQUAL);
 	}

 	else if (ast.flag == TOY_AST_FLAG_AND) {
-		EMIT_BYTE(rt, TOY_OPCODE_AND);
+		EMIT_BYTE(rt, code,TOY_OPCODE_AND);
 	}
 	else if (ast.flag == TOY_AST_FLAG_OR) {
-		EMIT_BYTE(rt, TOY_OPCODE_OR);
+		EMIT_BYTE(rt, code,TOY_OPCODE_OR);
+	}
+	else if (ast.flag == TOY_AST_FLAG_CONCAT) {
+		EMIT_BYTE(rt, code, TOY_OPCODE_CONCAT);
 	}
 	else {
 		fprintf(stderr, TOY_CC_ERROR "ERROR: Invalid AST binary flag found\n" TOY_CC_RESET);
@@ -198,9 +245,9 @@ static void writeInstructionBinary(Toy_Routine** rt, Toy_AstBinary ast) {
 	}

 	//4-byte alignment (covers most cases)
-	EMIT_BYTE(rt, 0);
-	EMIT_BYTE(rt, 0);
-	EMIT_BYTE(rt, 0);
+	EMIT_BYTE(rt, code,0);
+	EMIT_BYTE(rt, code,0);
+	EMIT_BYTE(rt, code,0);
 }

 static void writeInstructionPrint(Toy_Routine** rt, Toy_AstPrint ast) {
@@ -208,12 +255,12 @@ static void writeInstructionPrint(Toy_Routine** rt, Toy_AstPrint ast) {
 	writeRoutineCode(rt, ast.child);

 	//output the print opcode
-	EMIT_BYTE(rt, TOY_OPCODE_PRINT);
+	EMIT_BYTE(rt, code,TOY_OPCODE_PRINT);

 	//4-byte alignment
-	EMIT_BYTE(rt, 0);
-	EMIT_BYTE(rt, 0);
-	EMIT_BYTE(rt, 0);
+	EMIT_BYTE(rt, code,0);
+	EMIT_BYTE(rt, code,0);
+	EMIT_BYTE(rt, code,0);
 }

 //routine structure
@@ -274,39 +321,31 @@ static void writeRoutineCode(Toy_Routine** rt, Toy_Ast* ast) {
 	}
 }

-// static void writeRoutineJumps(Toy_Routine* rt) {
-// 	//
-// }
-
-// static void writeRoutineData(Toy_Routine* rt) {
-// 	//
-// }
-
 static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
 	//build the routine's parts
 	//TODO: param
 	//code
 	writeRoutineCode(&rt, ast);
-	EMIT_BYTE(&rt, TOY_OPCODE_RETURN); //temp terminator
-	EMIT_BYTE(&rt, 0); //4-byte alignment
-	EMIT_BYTE(&rt, 0);
-	EMIT_BYTE(&rt, 0);
-	//TODO: jumps
-	//TODO: data
+	EMIT_BYTE(&rt, code, TOY_OPCODE_RETURN); //temp terminator
+	EMIT_BYTE(&rt, code, 0); //4-byte alignment
+	EMIT_BYTE(&rt, code, 0);
+	EMIT_BYTE(&rt, code, 0);

 	//write the header and combine the parts
 	void* buffer = NULL;
 	unsigned int capacity = 0, count = 0;
-	// int paramAddr = 0, codeAddr = 0, jumpsAddr = 0, dataAddr = 0, subsAddr = 0;
+	// int paramAddr = 0, codeAddr = 0, subsAddr = 0;
 	int codeAddr = 0;
+	int jumpsAddr = 0;
+	int dataAddr = 0;

 	emitInt(&buffer, &capacity, &count, 0); //total size (overwritten later)
-	emitInt(&buffer, &capacity, &count, rt->paramCount); //param count
-	emitInt(&buffer, &capacity, &count, rt->jumpsCount); //jumps count
-	emitInt(&buffer, &capacity, &count, rt->dataCount); //data count
-	emitInt(&buffer, &capacity, &count, rt->subsCount); //routine count
+	emitInt(&buffer, &capacity, &count, rt->paramCount); //param size
+	emitInt(&buffer, &capacity, &count, rt->jumpsCount); //jumps size
+	emitInt(&buffer, &capacity, &count, rt->dataCount); //data size
+	emitInt(&buffer, &capacity, &count, rt->subsCount); //routine size

-	//generate blank spaces, cache their positions in the []Addr variables
+	//generate blank spaces, cache their positions in the *Addr variables (for storing the start positions)
 	if (rt->paramCount > 0) {
 		// paramAddr = count;
 		emitInt((void**)&buffer, &capacity, &count, 0); //params
@@ -316,11 +355,11 @@ static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
 		emitInt((void**)&buffer, &capacity, &count, 0); //code
 	}
 	if (rt->jumpsCount > 0) {
-		// jumpsAddr = count;
+		jumpsAddr = count;
 		emitInt((void**)&buffer, &capacity, &count, 0); //jumps
 	}
 	if (rt->dataCount > 0) {
-		// dataAddr = count;
+		dataAddr = count;
 		emitInt((void**)&buffer, &capacity, &count, 0); //data
 	}
 	if (rt->subsCount > 0) {
@@ -337,6 +376,22 @@ static void* writeRoutine(Toy_Routine* rt, Toy_Ast* ast) {
 		count += rt->codeCount;
 	}

+	if (rt->jumpsCount > 0) {
+		expand(&buffer, &capacity, &count, rt->jumpsCount);
+		memcpy((buffer + count), rt->jumps, rt->jumpsCount);
+
+		*((int*)(buffer + jumpsAddr)) = count;
+		count += rt->jumpsCount;
+	}
+
+	if (rt->dataCount > 0) {
+		expand(&buffer, &capacity, &count, rt->dataCount);
+		memcpy((buffer + count), rt->data, rt->dataCount);
+
+		*((int*)(buffer + dataAddr)) = count;
+		count += rt->dataCount;
+	}
+
 	//finally, record the total size within the header, and return the result
 	*((int*)buffer) = count;

@@ -17,7 +17,7 @@ typedef struct Toy_Routine {
 	unsigned int jumpsCapacity;
 	unsigned int jumpsCount;

-	unsigned char* data; //{type,val} tuples of data
+	unsigned char* data; //data for longer stuff
 	unsigned int dataCapacity;
 	unsigned int dataCount;

@@ -42,13 +42,18 @@ Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) {
 }

 Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length) {
+	if (length > TOY_STRING_MAX_LENGTH) {
+		fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
+		exit(-1);
+	}
+
 	Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity

 	ret->type = TOY_STRING_LEAF;
 	ret->length = length;
 	ret->refCount = 1;
 	ret->cachedHash = 0; //don't calc until needed
-	memcpy(ret->as.leaf.data, cstring, length);
+	memcpy(ret->as.leaf.data, cstring, length + 1);
 	ret->as.leaf.data[length] = '\0';

 	return ret;
@@ -57,13 +62,18 @@ Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstrin
 TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname) {
 	int length = strlen(cname);

+	if (length > TOY_STRING_MAX_LENGTH) {
+		fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a name string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
+		exit(-1);
+	}
+
 	Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity

 	ret->type = TOY_STRING_NAME;
 	ret->length = length;
 	ret->refCount = 1;
 	ret->cachedHash = 0; //don't calc until needed
-	memcpy(ret->as.name.data, cname, length);
+	memcpy(ret->as.name.data, cname, length + 1);
 	ret->as.name.data[length] = '\0';

 	return ret;
@@ -98,7 +108,7 @@ Toy_String* Toy_deepCopyString(Toy_Bucket** bucketHandle, Toy_String* str) {
 		ret->length = str->length;
 		ret->refCount = 1;
 		ret->cachedHash = str->cachedHash;
-		memcpy(ret->as.name.data, str->as.name.data, str->length);
+		memcpy(ret->as.name.data, str->as.name.data, str->length + 1);
 		ret->as.name.data[ret->length] = '\0';
 	}

@@ -116,6 +126,11 @@ Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* left, Toy_S
 		exit(-1);
 	}

+	if (left->length + right->length > TOY_STRING_MAX_LENGTH) {
+		fprintf(stderr, TOY_CC_ERROR "ERROR: Can't concat a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
+		exit(-1);
+	}
+
 	Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String));

 	ret->type = TOY_STRING_NODE;
@@ -5,6 +5,8 @@
 #include "toy_bucket.h"
 #include "toy_value.h"

+#define TOY_STRING_MAX_LENGTH 1000
+
 //rope pattern
 typedef struct Toy_String {             //32 | 64 BITNESS
 	enum Toy_StringType {
@@ -15,23 +15,25 @@ typedef enum Toy_ValueType {
 	TOY_VALUE_DICTIONARY,
 	TOY_VALUE_FUNCTION,
 	TOY_VALUE_OPAQUE,
+
+	//TODO: type, any
 } Toy_ValueType;

 //8 bytes in size
-typedef struct Toy_Value {      //32 | 64 BITNESS
+typedef struct Toy_Value {          //32 | 64 BITNESS
 	union {
-		bool boolean;           //1  | 1
-		int integer;            //4  | 4
-		float number;           //4  | 4
-		struct Toy_String* string;     //4  | 8
+		bool boolean;               //1  | 1
+		int integer;                //4  | 4
+		float number;               //4  | 4
+		struct Toy_String* string;  //4  | 8
 		//TODO: arrays
 		//TODO: dictonaries
 		//TODO: functions
 		//TODO: opaque
-	} as;                       //4  | 8
+	} as;                           //4  | 8

-	Toy_ValueType type;         //4  | 4
-} Toy_Value;                    //8  | 12
+	Toy_ValueType type;             //4  | 4
+} Toy_Value;                        //8  | 16

 #define TOY_VALUE_IS_NULL(value)				((value).type == TOY_VALUE_NULL)
 #define TOY_VALUE_IS_BOOLEAN(value)				((value).type == TOY_VALUE_BOOLEAN)
@@ -4,6 +4,7 @@
 #include "toy_print.h"
 #include "toy_opcodes.h"
 #include "toy_value.h"
+#include "toy_string.h"

 #include <stdio.h>
 #include <stdlib.h>
@@ -64,8 +65,17 @@ static void processRead(Toy_VM* vm) {
 		}

 		case TOY_VALUE_STRING: {
-			//
-			// break;
+			fix_alignment(vm);
+			//grab the jump as an integer
+			unsigned int jump = *(unsigned int*)(vm->routine + vm->jumpsAddr + READ_INT(vm));
+
+			//jumps are relative to the data address
+			char* cstring = (char*)(vm->routine + vm->dataAddr + jump);
+
+			//build a string from the data section
+			value = TOY_VALUE_FROM_STRING(Toy_createString(&vm->stringBucket, cstring));
+
+			break;
 		}

 		case TOY_VALUE_ARRAY: {
@@ -264,7 +274,24 @@ static void processPrint(Toy_VM* vm) {
 			break;
 		}

-		case TOY_VALUE_STRING: //TODO: decide on how long strings, etc. live for in memory
+		case TOY_VALUE_STRING: {
+			Toy_String* str = TOY_VALUE_AS_STRING(value);
+
+			//TODO: decide on how long strings, etc. live for in memory
+			if (str->type == TOY_STRING_NODE) {
+				char* buffer = Toy_getStringRawBuffer(str);
+				Toy_print(buffer);
+				free(buffer);
+			}
+			else if (str->type == TOY_STRING_LEAF) {
+				Toy_print(str->as.leaf.data);
+			}
+			else if (str->type == TOY_STRING_NAME) {
+				Toy_print(str->as.name.data); //should this be a thing?
+			}
+			break;
+		}
+
 		case TOY_VALUE_ARRAY:
 		case TOY_VALUE_DICTIONARY:
 		case TOY_VALUE_FUNCTION:
@@ -274,6 +301,25 @@ static void processPrint(Toy_VM* vm) {
 	}
 }

+static void processConcat(Toy_VM* vm) {
+	Toy_Value right = Toy_popStack(&vm->stack);
+	Toy_Value left = Toy_popStack(&vm->stack);
+
+	if (!TOY_VALUE_IS_STRING(left)) {
+		Toy_error("Failed to concatenate a value that is not a string");
+		return;
+	}
+
+	if (!TOY_VALUE_IS_STRING(left)) {
+		Toy_error("Failed to concatenate a value that is not a string");
+		return;
+	}
+
+	//all good
+	Toy_String* result = Toy_concatStrings(&vm->stringBucket, TOY_VALUE_AS_STRING(left), TOY_VALUE_AS_STRING(right));
+	Toy_pushStack(&vm->stack, TOY_VALUE_FROM_STRING(result));
+}
+
 static void process(Toy_VM* vm) {
 	while(true) {
 		Toy_OpcodeType opcode = READ_BYTE(vm);
@@ -320,6 +366,10 @@ static void process(Toy_VM* vm) {
 				processPrint(vm);
 				break;

+			case TOY_OPCODE_CONCAT:
+				processConcat(vm);
+				break;
+
 			//not yet implemented
 			case TOY_OPCODE_LOAD:
 			case TOY_OPCODE_LOAD_LONG:
@@ -373,32 +423,34 @@ void Toy_bindVMToRoutine(Toy_VM* vm, unsigned char* routine) {

 	//read the header metadata
 	vm->routineSize = READ_UNSIGNED_INT(vm);
-	vm->paramCount = READ_UNSIGNED_INT(vm);
-	vm->jumpsCount = READ_UNSIGNED_INT(vm);
-	vm->dataCount = READ_UNSIGNED_INT(vm);
-	vm->subsCount = READ_UNSIGNED_INT(vm);
+	vm->paramSize = READ_UNSIGNED_INT(vm);
+	vm->jumpsSize = READ_UNSIGNED_INT(vm);
+	vm->dataSize = READ_UNSIGNED_INT(vm);
+	vm->subsSize = READ_UNSIGNED_INT(vm);

 	//read the header addresses
-	if (vm->paramCount > 0) {
+	if (vm->paramSize > 0) {
 		vm->paramAddr = READ_UNSIGNED_INT(vm);
 	}

 	vm->codeAddr = READ_UNSIGNED_INT(vm); //required

-	if (vm->jumpsCount > 0) {
+	if (vm->jumpsSize > 0) {
 		vm->jumpsAddr = READ_UNSIGNED_INT(vm);
 	}

-	if (vm->dataCount > 0) {
+	if (vm->dataSize > 0) {
 		vm->dataAddr = READ_UNSIGNED_INT(vm);
 	}

-	if (vm->subsCount > 0) {
+	if (vm->subsSize > 0) {
 		vm->subsAddr = READ_UNSIGNED_INT(vm);
 	}

-	//preallocate the scope & stack
+	//allocate the stack, scope, and memory
 	vm->stack = Toy_allocateStack();
+	//TODO: scope
+	vm->stringBucket = Toy_allocateBucket(TOY_BUCKET_IDEAL);
 }

 void Toy_runVM(Toy_VM* vm) {
@@ -412,13 +464,12 @@ void Toy_runVM(Toy_VM* vm) {
 }

 void Toy_freeVM(Toy_VM* vm) {
-	//clear the stack
+	//clear the stack, scope and memory
 	Toy_freeStack(vm->stack);
-
 	//TODO: clear the scope
+	Toy_freeBucket(&vm->stringBucket);

 	//free the bytecode
-
 	free(vm->bc);
 	Toy_resetVM(vm);
 }
@@ -429,10 +480,10 @@ void Toy_resetVM(Toy_VM* vm) {
 	vm->routine = NULL;
 	vm->routineSize = 0;

-	vm->paramCount = 0;
-	vm->jumpsCount = 0;
-	vm->dataCount = 0;
-	vm->subsCount = 0;
+	vm->paramSize = 0;
+	vm->jumpsSize = 0;
+	vm->dataSize = 0;
+	vm->subsSize = 0;

 	vm->paramAddr = 0;
 	vm->codeAddr = 0;
@@ -3,6 +3,7 @@
 #include "toy_common.h"

 #include "toy_stack.h"
+#include "toy_bucket.h"

 typedef struct Toy_VM {
 	//hold the raw bytecode
@@ -12,10 +13,10 @@ typedef struct Toy_VM {
 	unsigned char* routine;
 	unsigned int routineSize;

-	unsigned int paramCount;
-	unsigned int jumpsCount;
-	unsigned int dataCount;
-	unsigned int subsCount;
+	unsigned int paramSize;
+	unsigned int jumpsSize;
+	unsigned int dataSize;
+	unsigned int subsSize;

 	unsigned int paramAddr;
 	unsigned int codeAddr;
@@ -30,6 +31,9 @@ typedef struct Toy_VM {

 	//stack - immediate-level values only
 	Toy_Stack* stack;
+
+	//easy access to memory
+	Toy_Bucket* stringBucket;
 } Toy_VM;

 TOY_API void Toy_bindVM(Toy_VM* vm, unsigned char* bytecode); //process the version data