#include "toy_parser.h" #include "toy_memory.h" #include "toy_literal.h" #include "toy_opcodes.h" #include "toy_console_colors.h" #include //utility functions static void error(Toy_Parser* parser, Toy_Token token, const char* message) { //keep going while panicing if (parser->panic) return; fprintf(stderr, TOY_CC_ERROR "[Line %d] Error", token.line); //check type if (token.type == TOY_TOKEN_EOF) { fprintf(stderr, " at end"); } else { fprintf(stderr, " at '%.*s'", token.length, token.lexeme); } //finally fprintf(stderr, ": %s\n" TOY_CC_RESET, message); parser->error = true; parser->panic = true; } static void advance(Toy_Parser* parser) { parser->previous = parser->current; parser->current = Toy_private_scanLexer(parser->lexer); if (parser->current.type == TOY_TOKEN_ERROR) { error(parser, parser->current, "Toy_Lexer error"); } } static bool match(Toy_Parser* parser, Toy_TokenType tokenType) { if (parser->current.type == tokenType) { advance(parser); return true; } return false; } static void consume(Toy_Parser* parser, Toy_TokenType tokenType, const char* msg) { if (parser->current.type != tokenType) { error(parser, parser->current, msg); return; } advance(parser); } static void synchronize(Toy_Parser* parser) { #ifndef TOY_EXPORT if (Toy_commandLine.verbose) { fprintf(stderr, TOY_CC_ERROR "Synchronizing input\n" TOY_CC_RESET); } #endif while (parser->current.type != TOY_TOKEN_EOF) { switch(parser->current.type) { //these tokens can start a line case TOY_TOKEN_ASSERT: case TOY_TOKEN_BREAK: case TOY_TOKEN_CLASS: case TOY_TOKEN_CONTINUE: case TOY_TOKEN_DO: case TOY_TOKEN_EXPORT: case TOY_TOKEN_FOR: case TOY_TOKEN_FOREACH: case TOY_TOKEN_IF: case TOY_TOKEN_IMPORT: case TOY_TOKEN_PRINT: case TOY_TOKEN_RETURN: case TOY_TOKEN_VAR: case TOY_TOKEN_WHILE: parser->panic = false; return; default: advance(parser); } } } //the pratt table collates the precedence rules typedef enum { PREC_NONE, PREC_ASSIGNMENT, PREC_TERNARY, PREC_OR, PREC_AND, PREC_COMPARISON, PREC_TERM, PREC_FACTOR, PREC_UNARY, PREC_CALL, PREC_PRIMARY, } PrecedenceRule; typedef Toy_Opcode (*ParseFn)(Toy_Parser* parser, Toy_ASTNode** nodeHandle); typedef struct { ParseFn prefix; ParseFn infix; PrecedenceRule precedence; } ParseRule; //no static! ParseRule parseRules[]; //forward declarations static void declaration(Toy_Parser* parser, Toy_ASTNode** nodeHandle); static void parsePrecedence(Toy_Parser* parser, Toy_ASTNode** nodeHandle, PrecedenceRule rule); static Toy_Literal readTypeToLiteral(Toy_Parser* parser); static void varDecl(Toy_Parser* parser, Toy_ASTNode** nodeHandle); //TODO: resolve the messy order of these //the expression rules static Toy_Opcode asType(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_Literal literal = readTypeToLiteral(parser); if (!TOY_IS_TYPE(literal)) { error(parser, parser->previous, "Expected type after 'astype' keyword"); Toy_freeLiteral(literal); return TOY_OP_EOF; } Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); return TOY_OP_EOF; } static Toy_Opcode typeOf(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* rhs = NULL; parsePrecedence(parser, &rhs, PREC_CALL); Toy_emitASTNodeUnary(nodeHandle, TOY_OP_TYPE_OF, rhs); return TOY_OP_EOF; } static Toy_Opcode compound(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //read either an array or a dictionary into a literal node int iterations = 0; //count the number of entries iterated over //compound nodes to store what is read Toy_ASTNode* array = NULL; Toy_ASTNode* dictionary = NULL; while (!match(parser, TOY_TOKEN_BRACKET_RIGHT)) { //if empty dictionary, there will be a colon between the brackets if (iterations == 0 && match(parser, TOY_TOKEN_COLON)) { consume(parser, TOY_TOKEN_BRACKET_RIGHT, "Expected ']' at the end of empty dictionary definition"); //emit an empty dictionary and finish Toy_emitASTNodeCompound(&dictionary, TOY_LITERAL_DICTIONARY); break; } if (iterations > 0) { consume(parser, TOY_TOKEN_COMMA, "Expected ',' in array or dictionary"); } iterations++; Toy_ASTNode* left = NULL; Toy_ASTNode* right = NULL; //store the left parsePrecedence(parser, &left, PREC_PRIMARY); if (!left) { //error return TOY_OP_EOF; } //detect a dictionary if (match(parser, TOY_TOKEN_COLON)) { parsePrecedence(parser, &right, PREC_PRIMARY); if (!right) { //error Toy_freeASTNode(left); return TOY_OP_EOF; } //check we ARE defining a dictionary if (array) { error(parser, parser->previous, "Incorrect detection between array and dictionary"); Toy_freeASTNode(array); return TOY_OP_EOF; } //init the dictionary if (!dictionary) { Toy_emitASTNodeCompound(&dictionary, TOY_LITERAL_DICTIONARY); } //grow the node if needed if (dictionary->compound.capacity < dictionary->compound.count + 1) { int oldCapacity = dictionary->compound.capacity; dictionary->compound.capacity = TOY_GROW_CAPACITY(oldCapacity); dictionary->compound.nodes = TOY_GROW_ARRAY(Toy_ASTNode, dictionary->compound.nodes, oldCapacity, dictionary->compound.capacity); } //store the left and right in the node Toy_setASTNodePair(&dictionary->compound.nodes[dictionary->compound.count++], left, right); } //detect an array else { //check we ARE defining an array if (dictionary) { error(parser, parser->current, "Incorrect detection between array and dictionary"); Toy_freeASTNode(dictionary); return TOY_OP_EOF; } //init the array if (!array) { Toy_emitASTNodeCompound(&array, TOY_LITERAL_ARRAY); } //grow the node if needed if (array->compound.capacity < array->compound.count + 1) { int oldCapacity = array->compound.capacity; array->compound.capacity = TOY_GROW_CAPACITY(oldCapacity); array->compound.nodes = TOY_GROW_ARRAY(Toy_ASTNode, array->compound.nodes, oldCapacity, array->compound.capacity); } //copy into the array, and manually free the temp node array->compound.nodes[array->compound.count++] = *left; TOY_FREE(Toy_ASTNode, left); } } //save the result if (array) { (*nodeHandle) = array; } else if (dictionary) { (*nodeHandle) = dictionary; } else { //both are null, must be an array (because reasons) Toy_emitASTNodeCompound(&array, TOY_LITERAL_ARRAY); (*nodeHandle) = array; } //ignored return TOY_OP_EOF; } static Toy_Opcode string(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //handle strings switch(parser->previous.type) { case TOY_TOKEN_LITERAL_STRING: { //unescape valid escaped characters int strLength = 0; char* buffer = TOY_ALLOCATE(char, parser->previous.length); for (int i = 0; i < parser->previous.length; i++) { if (parser->previous.lexeme[i] != '\\') { //copy normally buffer[strLength++] = parser->previous.lexeme[i]; continue; } //unescape based on the character switch(parser->previous.lexeme[++i]) { case 'n': buffer[strLength++] = '\n'; break; case 't': buffer[strLength++] = '\t'; break; case '\\': buffer[strLength++] = '\\'; break; case '"': buffer[strLength++] = '"'; break; default: { char msg[256]; snprintf(msg, 256, TOY_CC_ERROR "Unrecognized escape character %c in string" TOY_CC_RESET, parser->previous.lexeme[++i]); error(parser, parser->previous, msg); } } } //for length safety if (strLength > TOY_MAX_STRING_LENGTH) { strLength = TOY_MAX_STRING_LENGTH; char msg[256]; snprintf(msg, 256, TOY_CC_ERROR "Strings can only be a maximum of %d characters long" TOY_CC_RESET, TOY_MAX_STRING_LENGTH); error(parser, parser->previous, msg); } Toy_Literal literal = TOY_TO_STRING_LITERAL(Toy_createRefStringLength(buffer, strLength)); TOY_FREE_ARRAY(char, buffer, parser->previous.length); Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); return TOY_OP_EOF; } //TODO: interpolated strings default: error(parser, parser->previous, "Unexpected token passed to string precedence rule"); return TOY_OP_EOF; } } static Toy_Opcode grouping(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //handle groupings with () switch(parser->previous.type) { case TOY_TOKEN_PAREN_LEFT: { parsePrecedence(parser, nodeHandle, PREC_TERNARY); consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' at end of grouping"); //process the result without optimisations Toy_emitASTNodeGrouping(nodeHandle); return TOY_OP_EOF; } default: error(parser, parser->previous, "Unexpected token passed to grouping precedence rule"); return TOY_OP_EOF; } } static Toy_Opcode binary(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { advance(parser); //binary() is an infix rule - so only get the RHS of the operator switch(parser->previous.type) { //arithmetic case TOY_TOKEN_PLUS: { parsePrecedence(parser, nodeHandle, PREC_TERM + 1); return TOY_OP_ADDITION; } case TOY_TOKEN_MINUS: { parsePrecedence(parser, nodeHandle, PREC_TERM + 1); return TOY_OP_SUBTRACTION; } case TOY_TOKEN_MULTIPLY: { parsePrecedence(parser, nodeHandle, PREC_FACTOR + 1); return TOY_OP_MULTIPLICATION; } case TOY_TOKEN_DIVIDE: { parsePrecedence(parser, nodeHandle, PREC_FACTOR + 1); return TOY_OP_DIVISION; } case TOY_TOKEN_MODULO: { parsePrecedence(parser, nodeHandle, PREC_FACTOR + 1); return TOY_OP_MODULO; } //assignment case TOY_TOKEN_ASSIGN: { parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT + 1); return TOY_OP_VAR_ASSIGN; } case TOY_TOKEN_PLUS_ASSIGN: { parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT + 1); return TOY_OP_VAR_ADDITION_ASSIGN; } case TOY_TOKEN_MINUS_ASSIGN: { parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT + 1); return TOY_OP_VAR_SUBTRACTION_ASSIGN; } case TOY_TOKEN_MULTIPLY_ASSIGN: { parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT + 1); return TOY_OP_VAR_MULTIPLICATION_ASSIGN; } case TOY_TOKEN_DIVIDE_ASSIGN: { parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT + 1); return TOY_OP_VAR_DIVISION_ASSIGN; } case TOY_TOKEN_MODULO_ASSIGN: { parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT + 1); return TOY_OP_VAR_MODULO_ASSIGN; } //comparison case TOY_TOKEN_EQUAL: { parsePrecedence(parser, nodeHandle, PREC_COMPARISON + 1); return TOY_OP_COMPARE_EQUAL; } case TOY_TOKEN_NOT_EQUAL: { parsePrecedence(parser, nodeHandle, PREC_COMPARISON + 1); return TOY_OP_COMPARE_NOT_EQUAL; } case TOY_TOKEN_LESS: { parsePrecedence(parser, nodeHandle, PREC_COMPARISON + 1); return TOY_OP_COMPARE_LESS; } case TOY_TOKEN_LESS_EQUAL: { parsePrecedence(parser, nodeHandle, PREC_COMPARISON + 1); return TOY_OP_COMPARE_LESS_EQUAL; } case TOY_TOKEN_GREATER: { parsePrecedence(parser, nodeHandle, PREC_COMPARISON + 1); return TOY_OP_COMPARE_GREATER; } case TOY_TOKEN_GREATER_EQUAL: { parsePrecedence(parser, nodeHandle, PREC_COMPARISON + 1); return TOY_OP_COMPARE_GREATER_EQUAL; } case TOY_TOKEN_AND: { parsePrecedence(parser, nodeHandle, PREC_AND + 1); return TOY_OP_AND; } case TOY_TOKEN_OR: { parsePrecedence(parser, nodeHandle, PREC_OR + 1); return TOY_OP_OR; } default: error(parser, parser->previous, "Unexpected token passed to binary precedence rule"); return TOY_OP_EOF; } } static Toy_Opcode unary(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* tmpNode = NULL; if (parser->previous.type == TOY_TOKEN_MINUS) { //temp handle to potentially negate values parsePrecedence(parser, &tmpNode, PREC_TERM); //can be a literal //optimisation: check for negative literals if (tmpNode != NULL && tmpNode->type == TOY_AST_NODE_LITERAL && (TOY_IS_INTEGER(tmpNode->atomic.literal) || TOY_IS_FLOAT(tmpNode->atomic.literal))) { //negate directly, if int or float Toy_Literal lit = tmpNode->atomic.literal; if (TOY_IS_INTEGER(lit)) { lit = TOY_TO_INTEGER_LITERAL(-TOY_AS_INTEGER(lit)); } if (TOY_IS_FLOAT(lit)) { lit = TOY_TO_FLOAT_LITERAL(-TOY_AS_FLOAT(lit)); } tmpNode->atomic.literal = lit; *nodeHandle = tmpNode; return TOY_OP_EOF; } //check for negated boolean errors if (tmpNode != NULL && tmpNode->type == TOY_AST_NODE_LITERAL && TOY_IS_BOOLEAN(tmpNode->atomic.literal)) { error(parser, parser->previous, "Negative booleans are not allowed"); return TOY_OP_EOF; } //actually emit the negation node Toy_emitASTNodeUnary(nodeHandle, TOY_OP_NEGATE, tmpNode); } else if (parser->previous.type == TOY_TOKEN_NOT) { //temp handle to potentially negate values parsePrecedence(parser, &tmpNode, PREC_CALL); //can be a literal, grouping, fn call, etc. //optimisation: check for inverted booleans if (tmpNode != NULL && tmpNode->type == TOY_AST_NODE_LITERAL && TOY_IS_BOOLEAN(tmpNode->atomic.literal)) { //negate directly, if boolean Toy_Literal lit = tmpNode->atomic.literal; lit = TOY_TO_BOOLEAN_LITERAL(!TOY_AS_BOOLEAN(lit)); tmpNode->atomic.literal = lit; *nodeHandle = tmpNode; return TOY_OP_EOF; } //actually emit the negation Toy_emitASTNodeUnary(nodeHandle, TOY_OP_INVERT, tmpNode); } else { error(parser, parser->previous, "Unexpected token passed to unary precedence rule"); return TOY_OP_EOF; } return TOY_OP_EOF; } static char* removeChar(const char* lexeme, int length, char c) { int resPos = 0; char* result = TOY_ALLOCATE(char, length + 1); for (int i = 0; i < length; i++) { if (lexeme[i] == c) { continue; } result[resPos++] = lexeme[i]; } result[resPos] = '\0'; return result; } static Toy_Opcode atomic(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { switch(parser->previous.type) { case TOY_TOKEN_NULL: Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_NULL_LITERAL); return TOY_OP_EOF; case TOY_TOKEN_LITERAL_TRUE: Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_BOOLEAN_LITERAL(true)); return TOY_OP_EOF; case TOY_TOKEN_LITERAL_FALSE: Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_BOOLEAN_LITERAL(false)); return TOY_OP_EOF; case TOY_TOKEN_LITERAL_INTEGER: { int value = 0; const char* lexeme = removeChar(parser->previous.lexeme, parser->previous.length, '_'); sscanf(lexeme, "%d", &value); TOY_FREE_ARRAY(char, lexeme, parser->previous.length + 1); Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_INTEGER_LITERAL(value)); return TOY_OP_EOF; } case TOY_TOKEN_LITERAL_FLOAT: { float value = 0; const char* lexeme = removeChar(parser->previous.lexeme, parser->previous.length, '_'); sscanf(lexeme, "%f", &value); TOY_FREE_ARRAY(char, lexeme, parser->previous.length + 1); Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_FLOAT_LITERAL(value)); return TOY_OP_EOF; } case TOY_TOKEN_TYPE: { if (match(parser, TOY_TOKEN_CONST)) { Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_TYPE_LITERAL(TOY_LITERAL_TYPE, true)); } else { Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_TYPE_LITERAL(TOY_LITERAL_TYPE, false)); } return TOY_OP_EOF; } default: error(parser, parser->previous, "Unexpected token passed to atomic precedence rule"); return TOY_OP_EOF; } } static Toy_Opcode identifier(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //make a copy of the string Toy_Token identifierToken = parser->previous; if (identifierToken.type != TOY_TOKEN_IDENTIFIER) { error(parser, parser->previous, "Expected identifier"); return TOY_OP_EOF; } int length = identifierToken.length; //for safety if (length > 256) { length = 256; error(parser, parser->previous, "Identifiers can only be a maximum of 256 characters long"); } Toy_Literal identifier = TOY_TO_IDENTIFIER_LITERAL(Toy_createRefStringLength(identifierToken.lexeme, length)); Toy_emitASTNodeLiteral(nodeHandle, identifier); Toy_freeLiteral(identifier); return TOY_OP_EOF; } static Toy_Opcode castingPrefix(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { switch(parser->previous.type) { case TOY_TOKEN_BOOLEAN: { Toy_Literal literal = TOY_TO_TYPE_LITERAL(TOY_LITERAL_BOOLEAN, false); Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); } break; case TOY_TOKEN_INTEGER: { Toy_Literal literal = TOY_TO_TYPE_LITERAL(TOY_LITERAL_INTEGER, false); Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); } break; case TOY_TOKEN_FLOAT: { Toy_Literal literal = TOY_TO_TYPE_LITERAL(TOY_LITERAL_FLOAT, false); Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); } break; case TOY_TOKEN_STRING: { Toy_Literal literal = TOY_TO_TYPE_LITERAL(TOY_LITERAL_STRING, false); Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); } break; default: error(parser, parser->previous, "Unexpected token passed to casting precedence rule"); return TOY_OP_EOF; } return TOY_OP_EOF; } static Toy_Opcode castingInfix(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { advance(parser); //NOTE: using the precedence rules here switch(parser->previous.type) { case TOY_TOKEN_IDENTIFIER: identifier(parser, nodeHandle); break; case TOY_TOKEN_LITERAL_TRUE: case TOY_TOKEN_LITERAL_FALSE: atomic(parser, nodeHandle); break; case TOY_TOKEN_LITERAL_INTEGER: atomic(parser, nodeHandle); break; case TOY_TOKEN_LITERAL_FLOAT: atomic(parser, nodeHandle); break; case TOY_TOKEN_LITERAL_STRING: string(parser, nodeHandle); break; default: error(parser, parser->previous, "Unexpected token passed to casting infix precedence rule"); return TOY_OP_EOF; } return TOY_OP_TYPE_CAST; } //TODO: fix these screwy names static Toy_Opcode incrementPrefix(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { advance(parser); Toy_ASTNode* tmpNode = NULL; identifier(parser, &tmpNode); if (!tmpNode) { return TOY_OP_EOF; } Toy_emitASTNodePrefixIncrement(nodeHandle, tmpNode->atomic.literal); Toy_freeASTNode(tmpNode); return TOY_OP_EOF; } static Toy_Opcode incrementInfix(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* tmpNode = NULL; identifier(parser, &tmpNode); advance(parser); if (!tmpNode) { return TOY_OP_EOF; } Toy_emitASTNodePostfixIncrement(nodeHandle, tmpNode->atomic.literal); Toy_freeASTNode(tmpNode); return TOY_OP_EOF; } static Toy_Opcode decrementPrefix(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { advance(parser); Toy_ASTNode* tmpNode = NULL; identifier(parser, &tmpNode); if (!tmpNode) { return TOY_OP_EOF; } Toy_emitASTNodePrefixDecrement(nodeHandle, tmpNode->atomic.literal); Toy_freeASTNode(tmpNode); return TOY_OP_EOF; } static Toy_Opcode decrementInfix(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* tmpNode = NULL; identifier(parser, &tmpNode); advance(parser); if (!tmpNode) { return TOY_OP_EOF; } Toy_emitASTNodePostfixDecrement(nodeHandle, tmpNode->atomic.literal); Toy_freeASTNode(tmpNode); return TOY_OP_EOF; } static Toy_Opcode fnCall(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //wait - is the previous token a type? this should be casting instead if (parser->previous.type >= TOY_TOKEN_NULL && parser->previous.type <= TOY_TOKEN_ANY) { //casting type Toy_ASTNode* lhsNode = NULL; castingPrefix(parser, &lhsNode); advance(parser); //casting value Toy_ASTNode* rhsNode = NULL; grouping(parser, &rhsNode); //emit the cast node Toy_emitASTNodeBinary(&lhsNode, rhsNode, TOY_OP_TYPE_CAST); //pass it off to the caller *nodeHandle = lhsNode; return TOY_OP_GROUPING_BEGIN; //dummy value } advance(parser); //skip the left paren //binary() is an infix rule - so only get the RHS of the operator switch(parser->previous.type) { //arithmetic case TOY_TOKEN_PAREN_LEFT: { Toy_ASTNode* arguments = NULL; Toy_emitASTNodeFnCollection(&arguments); //if there's arguments if (!match(parser, TOY_TOKEN_PAREN_RIGHT)) { //read each argument do { //emit the node to the argument list (grow the node if needed) if (arguments->fnCollection.capacity < arguments->fnCollection.count + 1) { int oldCapacity = arguments->fnCollection.capacity; arguments->fnCollection.capacity = TOY_GROW_CAPACITY(oldCapacity); arguments->fnCollection.nodes = TOY_GROW_ARRAY(Toy_ASTNode, arguments->fnCollection.nodes, oldCapacity, arguments->fnCollection.capacity); } Toy_ASTNode* tmpNode = NULL; parsePrecedence(parser, &tmpNode, PREC_TERNARY); //BUGFIX if (!tmpNode) { error(parser, parser->previous, "[internal] No token found in fnCall"); return TOY_OP_EOF; } arguments->fnCollection.nodes[arguments->fnCollection.count++] = *tmpNode; TOY_FREE(Toy_ASTNode, tmpNode); //simply free the tmpNode, so you don't free the children } while(match(parser, TOY_TOKEN_COMMA)); consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' at end of argument list"); } //emit the call Toy_emitASTNodeFnCall(nodeHandle, arguments); return TOY_OP_FN_CALL; } break; default: error(parser, parser->previous, "Unexpected token passed to function call precedence rule"); return TOY_OP_EOF; } return TOY_OP_EOF; } static Toy_Opcode indexAccess(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //TODO: fix indexing signalling advance(parser); //val[first : second : third] Toy_ASTNode* first = NULL; Toy_ASTNode* second = NULL; Toy_ASTNode* third = NULL; //booleans indicate blank slice indexing Toy_emitASTNodeLiteral(&first, TOY_TO_INDEX_BLANK_LITERAL); Toy_emitASTNodeLiteral(&second, TOY_TO_INDEX_BLANK_LITERAL); Toy_emitASTNodeLiteral(&third, TOY_TO_INDEX_BLANK_LITERAL); bool readFirst = false; //pattern matching is bullcrap //eat the first if (!match(parser, TOY_TOKEN_COLON)) { Toy_freeASTNode(first); first = NULL; parsePrecedence(parser, &first, PREC_TERNARY); match(parser, TOY_TOKEN_COLON); readFirst = true; } if (!first) { Toy_freeASTNode(first); Toy_freeASTNode(second); Toy_freeASTNode(third); return TOY_OP_EOF; } if (match(parser, TOY_TOKEN_BRACKET_RIGHT)) { if (readFirst) { Toy_freeASTNode(second); second = NULL; } Toy_freeASTNode(third); third = NULL; Toy_emitASTNodeIndex(nodeHandle, first, second, third); return TOY_OP_INDEX; } //eat the second if (!match(parser, TOY_TOKEN_COLON)) { Toy_freeASTNode(second); second = NULL; parsePrecedence(parser, &second, PREC_TERNARY); match(parser, TOY_TOKEN_COLON); } if (!second) { Toy_freeASTNode(first); Toy_freeASTNode(second); Toy_freeASTNode(third); return TOY_OP_EOF; } if (match(parser, TOY_TOKEN_BRACKET_RIGHT)) { Toy_freeASTNode(third); third = NULL; Toy_emitASTNodeIndex(nodeHandle, first, second, third); return TOY_OP_INDEX; } //eat the third Toy_freeASTNode(third); third = NULL; parsePrecedence(parser, &third, PREC_TERNARY); if (!third) { Toy_freeASTNode(first); Toy_freeASTNode(second); Toy_freeASTNode(third); return TOY_OP_EOF; } Toy_emitASTNodeIndex(nodeHandle, first, second, third); consume(parser, TOY_TOKEN_BRACKET_RIGHT, "Expected ']' in index notation"); return TOY_OP_INDEX; } static Toy_Opcode question(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { advance(parser); //for the question mark Toy_ASTNode* thenPath = NULL; Toy_ASTNode* elsePath = NULL; parsePrecedence(parser, &thenPath, PREC_TERNARY); consume(parser, TOY_TOKEN_COLON, "Expected ':' in ternary expression"); parsePrecedence(parser, &elsePath, PREC_TERNARY); Toy_emitASTNodeTernary(nodeHandle, NULL, thenPath, elsePath); return TOY_OP_TERNARY; } static Toy_Opcode dot(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { advance(parser); //for the dot Toy_ASTNode* tmpNode = NULL; parsePrecedence(parser, &tmpNode, PREC_CALL); if (tmpNode == NULL || tmpNode->binary.right == NULL) { error(parser, parser->previous, "Expected function call after dot operator"); return TOY_OP_EOF; } (*nodeHandle) = tmpNode; return TOY_OP_DOT; //signal that the function name and arguments are in the wrong order } ParseRule parseRules[] = { //must match the token types //types {atomic, NULL, PREC_PRIMARY},// TOKEN_NULL, {castingPrefix, NULL, PREC_CALL},// TOKEN_BOOLEAN, {castingPrefix, NULL, PREC_CALL},// TOKEN_INTEGER, {castingPrefix, NULL, PREC_CALL},// TOKEN_FLOAT, {castingPrefix, NULL, PREC_CALL},// TOKEN_STRING, {NULL, NULL, PREC_NONE},// TOKEN_ARRAY, {NULL, NULL, PREC_NONE},// TOKEN_DICTIONARY, {NULL, NULL, PREC_NONE},// TOKEN_FUNCTION, {NULL, NULL, PREC_NONE},// TOKEN_OPAQUE, {NULL, NULL, PREC_NONE},// TOKEN_ANY, //keywords and reserved words {NULL, NULL, PREC_NONE},// TOKEN_AS, {NULL, NULL, PREC_NONE},// TOKEN_ASSERT, {NULL, NULL, PREC_NONE},// TOKEN_BREAK, {NULL, NULL, PREC_NONE},// TOKEN_CLASS, {NULL, NULL, PREC_NONE},// TOKEN_CONST, {NULL, NULL, PREC_NONE},// TOKEN_CONTINUE, {NULL, NULL, PREC_NONE},// TOKEN_DO, {NULL, NULL, PREC_NONE},// TOKEN_ELSE, {NULL, NULL, PREC_NONE},// TOKEN_EXPORT, {NULL, NULL, PREC_NONE},// TOKEN_FOR, {NULL, NULL, PREC_NONE},// TOKEN_FOREACH, {NULL, NULL, PREC_NONE},// TOKEN_IF, {NULL, NULL, PREC_NONE},// TOKEN_IMPORT, {NULL, NULL, PREC_NONE},// TOKEN_IN, {NULL, NULL, PREC_NONE},// TOKEN_OF, {NULL, NULL, PREC_NONE},// TOKEN_PRINT, {NULL, NULL, PREC_NONE},// TOKEN_RETURN, {atomic, NULL, PREC_PRIMARY},// TOKEN_TYPE, {asType, NULL, PREC_CALL},// TOKEN_ASTYPE, {typeOf, NULL, PREC_CALL},// TOKEN_TYPEOF, {NULL, NULL, PREC_NONE},// TOKEN_VAR, {NULL, NULL, PREC_NONE},// TOKEN_WHILE, //literal values {identifier, castingInfix, PREC_PRIMARY},// TOKEN_IDENTIFIER, {atomic, castingInfix, PREC_PRIMARY},// TOKEN_LITERAL_TRUE, {atomic, castingInfix, PREC_PRIMARY},// TOKEN_LITERAL_FALSE, {atomic, castingInfix, PREC_PRIMARY},// TOKEN_LITERAL_INTEGER, {atomic, castingInfix, PREC_PRIMARY},// TOKEN_LITERAL_FLOAT, {string, castingInfix, PREC_PRIMARY},// TOKEN_LITERAL_STRING, //math operators {NULL, binary, PREC_TERM},// TOKEN_PLUS, {unary, binary, PREC_TERM},// TOKEN_MINUS, {NULL, binary, PREC_FACTOR},// TOKEN_MULTIPLY, {NULL, binary, PREC_FACTOR},// TOKEN_DIVIDE, {NULL, binary, PREC_FACTOR},// TOKEN_MODULO, {NULL, binary, PREC_ASSIGNMENT},// TOKEN_PLUS_ASSIGN, {NULL, binary, PREC_ASSIGNMENT},// TOKEN_MINUS_ASSIGN, {NULL, binary, PREC_ASSIGNMENT},// TOKEN_MULTIPLY_ASSIGN, {NULL, binary, PREC_ASSIGNMENT},// TOKEN_DIVIDE_ASSIGN, {NULL, binary, PREC_ASSIGNMENT},// TOKEN_MODULO_ASSIGN, {incrementPrefix, incrementInfix, PREC_CALL},// TOKEN_PLUS_PLUS, {decrementPrefix, decrementInfix, PREC_CALL},// TOKEN_MINUS_MINUS, {NULL, binary, PREC_ASSIGNMENT},// TOKEN_ASSIGN, //logical operators {grouping, fnCall, PREC_CALL},// TOKEN_PAREN_LEFT, {NULL, NULL, PREC_NONE},// TOKEN_PAREN_RIGHT, {compound, indexAccess, PREC_CALL},// TOKEN_BRACKET_LEFT, {NULL, NULL, PREC_NONE},// TOKEN_BRACKET_RIGHT, {NULL, NULL, PREC_NONE},// TOKEN_BRACE_LEFT, {NULL, NULL, PREC_NONE},// TOKEN_BRACE_RIGHT, {unary, NULL, PREC_CALL},// TOKEN_NOT, {NULL, binary, PREC_COMPARISON},// TOKEN_NOT_EQUAL, {NULL, binary, PREC_COMPARISON},// TOKEN_EQUAL, {NULL, binary, PREC_COMPARISON},// TOKEN_LESS, {NULL, binary, PREC_COMPARISON},// TOKEN_GREATER, {NULL, binary, PREC_COMPARISON},// TOKEN_LESS_EQUAL, {NULL, binary, PREC_COMPARISON},// TOKEN_GREATER_EQUAL, {NULL, binary, PREC_AND},// TOKEN_AND, {NULL, binary, PREC_OR},// TOKEN_OR, //other operators {NULL, question, PREC_TERNARY}, //TOKEN_QUESTION, {NULL, NULL, PREC_NONE},// TOKEN_COLON, {NULL, NULL, PREC_NONE},// TOKEN_SEMICOLON, {NULL, NULL, PREC_NONE},// TOKEN_COMMA, {NULL, dot, PREC_CALL},// TOKEN_DOT, {NULL, NULL, PREC_NONE},// TOKEN_PIPE, {NULL, NULL, PREC_NONE},// TOKEN_REST, //meta tokens {NULL, NULL, PREC_NONE},// TOKEN_PASS, {NULL, NULL, PREC_NONE},// TOKEN_ERROR, {NULL, NULL, PREC_NONE},// TOKEN_EOF, }; ParseRule* getRule(Toy_TokenType type) { return &parseRules[type]; } //optimisation: constant folding static bool calcStaticBinaryArithmetic(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { switch((*nodeHandle)->binary.opcode) { case TOY_OP_ADDITION: case TOY_OP_SUBTRACTION: case TOY_OP_MULTIPLICATION: case TOY_OP_DIVISION: case TOY_OP_MODULO: case TOY_OP_COMPARE_EQUAL: case TOY_OP_COMPARE_NOT_EQUAL: case TOY_OP_COMPARE_LESS: case TOY_OP_COMPARE_LESS_EQUAL: case TOY_OP_COMPARE_GREATER: case TOY_OP_COMPARE_GREATER_EQUAL: break; default: return true; } //recurse to the left and right if ((*nodeHandle)->binary.left->type == TOY_AST_NODE_BINARY) { calcStaticBinaryArithmetic(parser, &(*nodeHandle)->binary.left); } if ((*nodeHandle)->binary.right->type == TOY_AST_NODE_BINARY) { calcStaticBinaryArithmetic(parser, &(*nodeHandle)->binary.right); } //make sure left and right are both literals if (!((*nodeHandle)->binary.left->type == TOY_AST_NODE_LITERAL && (*nodeHandle)->binary.right->type == TOY_AST_NODE_LITERAL)) { return true; } //evaluate Toy_Literal lhs = (*nodeHandle)->binary.left->atomic.literal; Toy_Literal rhs = (*nodeHandle)->binary.right->atomic.literal; Toy_Literal result = TOY_TO_NULL_LITERAL; //special case for string concatenation ONLY if (TOY_IS_STRING(lhs) && TOY_IS_STRING(rhs) && (*nodeHandle)->binary.opcode == TOY_OP_ADDITION) { //check for overflow int totalLength = TOY_AS_STRING(lhs)->length + TOY_AS_STRING(rhs)->length; if (totalLength > TOY_MAX_STRING_LENGTH) { error(parser, parser->previous, "Can't concatenate these strings, result is too long (error found in constant folding)\n"); return false; } //concat the strings char buffer[TOY_MAX_STRING_LENGTH]; snprintf(buffer, TOY_MAX_STRING_LENGTH, "%s%s", Toy_toCString(TOY_AS_STRING(lhs)), Toy_toCString(TOY_AS_STRING(rhs))); result = TOY_TO_STRING_LITERAL(Toy_createRefStringLength(buffer, totalLength)); } //type coersion if (TOY_IS_FLOAT(lhs) && TOY_IS_INTEGER(rhs)) { rhs = TOY_TO_FLOAT_LITERAL(TOY_AS_INTEGER(rhs)); } if (TOY_IS_INTEGER(lhs) && TOY_IS_FLOAT(rhs)) { lhs = TOY_TO_FLOAT_LITERAL(TOY_AS_INTEGER(lhs)); } //maths based on types if(TOY_IS_INTEGER(lhs) && TOY_IS_INTEGER(rhs)) { switch((*nodeHandle)->binary.opcode) { case TOY_OP_ADDITION: result = TOY_TO_INTEGER_LITERAL( TOY_AS_INTEGER(lhs) + TOY_AS_INTEGER(rhs) ); break; case TOY_OP_SUBTRACTION: result = TOY_TO_INTEGER_LITERAL( TOY_AS_INTEGER(lhs) - TOY_AS_INTEGER(rhs) ); break; case TOY_OP_MULTIPLICATION: result = TOY_TO_INTEGER_LITERAL( TOY_AS_INTEGER(lhs) * TOY_AS_INTEGER(rhs) ); break; case TOY_OP_DIVISION: if (TOY_AS_INTEGER(rhs) == 0) { error(parser, parser->previous, "Can't divide by zero (error found in constant folding)"); return false; } result = TOY_TO_INTEGER_LITERAL( TOY_AS_INTEGER(lhs) / TOY_AS_INTEGER(rhs) ); break; case TOY_OP_MODULO: if (TOY_AS_INTEGER(rhs) == 0) { error(parser, parser->previous, "Can't modulo by zero (error found in constant folding)"); return false; } result = TOY_TO_INTEGER_LITERAL( TOY_AS_INTEGER(lhs) % TOY_AS_INTEGER(rhs) ); break; case TOY_OP_COMPARE_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_INTEGER(lhs) == TOY_AS_INTEGER(rhs) ); break; case TOY_OP_COMPARE_NOT_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_INTEGER(lhs) != TOY_AS_INTEGER(rhs) ); break; case TOY_OP_COMPARE_LESS: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_INTEGER(lhs) < TOY_AS_INTEGER(rhs) ); break; case TOY_OP_COMPARE_LESS_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_INTEGER(lhs) <= TOY_AS_INTEGER(rhs) ); break; case TOY_OP_COMPARE_GREATER: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_INTEGER(lhs) > TOY_AS_INTEGER(rhs) ); break; case TOY_OP_COMPARE_GREATER_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_INTEGER(lhs) >= TOY_AS_INTEGER(rhs) ); break; default: error(parser, parser->previous, "[internal] bad opcode argument passed to calcStaticBinaryArithmetic()"); return false; } } //catch bad modulo if ((TOY_IS_FLOAT(lhs) || TOY_IS_FLOAT(rhs)) && (*nodeHandle)->binary.opcode == TOY_OP_MODULO) { error(parser, parser->previous, "Bad arithmetic argument (modulo on floats not allowed)"); return false; } if(TOY_IS_FLOAT(lhs) && TOY_IS_FLOAT(rhs)) { switch((*nodeHandle)->binary.opcode) { case TOY_OP_ADDITION: result = TOY_TO_FLOAT_LITERAL( TOY_AS_FLOAT(lhs) + TOY_AS_FLOAT(rhs) ); break; case TOY_OP_SUBTRACTION: result = TOY_TO_FLOAT_LITERAL( TOY_AS_FLOAT(lhs) - TOY_AS_FLOAT(rhs) ); break; case TOY_OP_MULTIPLICATION: result = TOY_TO_FLOAT_LITERAL( TOY_AS_FLOAT(lhs) * TOY_AS_FLOAT(rhs) ); break; case TOY_OP_DIVISION: if (TOY_AS_FLOAT(rhs) == 0) { error(parser, parser->previous, "Can't divide by zero (error found in constant folding)"); return false; } result = TOY_TO_FLOAT_LITERAL( TOY_AS_FLOAT(lhs) / TOY_AS_FLOAT(rhs) ); break; case TOY_OP_COMPARE_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_FLOAT(lhs) == TOY_AS_FLOAT(rhs) ); break; case TOY_OP_COMPARE_NOT_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_FLOAT(lhs) != TOY_AS_FLOAT(rhs) ); break; case TOY_OP_COMPARE_LESS: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_FLOAT(lhs) < TOY_AS_FLOAT(rhs) ); break; case TOY_OP_COMPARE_LESS_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_FLOAT(lhs) <= TOY_AS_FLOAT(rhs) ); break; case TOY_OP_COMPARE_GREATER: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_FLOAT(lhs) > TOY_AS_FLOAT(rhs) ); break; case TOY_OP_COMPARE_GREATER_EQUAL: result = TOY_TO_BOOLEAN_LITERAL( TOY_AS_FLOAT(lhs) >= TOY_AS_FLOAT(rhs) ); break; default: error(parser, parser->previous, "[internal] bad opcode argument passed to calcStaticBinaryArithmetic()"); return false; } } //nothing can be done to optimize if (TOY_IS_NULL(result)) { return true; } //optimize by converting this node into a literal node Toy_freeASTNode((*nodeHandle)->binary.left); Toy_freeASTNode((*nodeHandle)->binary.right); (*nodeHandle)->type = TOY_AST_NODE_LITERAL; (*nodeHandle)->atomic.literal = result; return true; } static void dottify(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //TODO: remove dot from the compiler entirely //only if this is chained from a higher binary "fn call" if ((*nodeHandle)->type == TOY_AST_NODE_BINARY) { if ((*nodeHandle)->binary.opcode == TOY_OP_FN_CALL) { (*nodeHandle)->binary.opcode = TOY_OP_DOT; (*nodeHandle)->binary.right->fnCall.argumentCount++; } dottify(parser, &(*nodeHandle)->binary.left); dottify(parser, &(*nodeHandle)->binary.right); } } static void parsePrecedence(Toy_Parser* parser, Toy_ASTNode** nodeHandle, PrecedenceRule rule) { //every valid expression has a prefix rule advance(parser); ParseFn prefixRule = getRule(parser->previous.type)->prefix; if (prefixRule == NULL) { *nodeHandle = NULL; //the handle's value MUST be set to null for error handling error(parser, parser->previous, "Expected expression"); return; } bool canBeAssigned = rule <= PREC_ASSIGNMENT; prefixRule(parser, nodeHandle); //ignore the returned opcode //infix rules are left-recursive while (rule <= getRule(parser->current.type)->precedence) { ParseFn infixRule = getRule(parser->current.type)->infix; if (infixRule == NULL) { *nodeHandle = NULL; //the handle's value MUST be set to null for error handling error(parser, parser->current, "Expected operator"); return; } Toy_ASTNode* rhsNode = NULL; const Toy_Opcode opcode = infixRule(parser, &rhsNode); //NOTE: infix rule must advance the parser if (opcode == TOY_OP_EOF) { Toy_freeASTNode(*nodeHandle); *nodeHandle = rhsNode; return; //we're done here } //BUGFIX: dot-chaining if (opcode == TOY_OP_DOT) { dottify(parser, &rhsNode); } //BUGFIX: ternary shorthand if (opcode == TOY_OP_TERNARY) { rhsNode->ternary.condition = *nodeHandle; *nodeHandle = rhsNode; continue; } Toy_emitASTNodeBinary(nodeHandle, rhsNode, opcode); //optimise away the constants if (!parser->panic && !calcStaticBinaryArithmetic(parser, nodeHandle)) { return; } } //if your precedence is below "assignment" if (canBeAssigned && match(parser, TOY_TOKEN_ASSIGN)) { error(parser, parser->current, "Invalid assignment target"); } } //expressions static void expression(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //delegate to the pratt table for expression precedence parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT); } //statements static void blockStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //init Toy_emitASTNodeBlock(nodeHandle); //sub-scope, compile it and push it up in a node while (!match(parser, TOY_TOKEN_BRACE_RIGHT)) { if ((*nodeHandle)->block.capacity < (*nodeHandle)->block.count + 1) { int oldCapacity = (*nodeHandle)->block.capacity; (*nodeHandle)->block.capacity = TOY_GROW_CAPACITY(oldCapacity); (*nodeHandle)->block.nodes = TOY_GROW_ARRAY(Toy_ASTNode, (*nodeHandle)->block.nodes, oldCapacity, (*nodeHandle)->block.capacity); } Toy_ASTNode* tmpNode = NULL; //process the grammar rule for this line declaration(parser, &tmpNode); // Ground floor: perfumery / Stationery and leather goods / Wigs and haberdashery / Kitchenware and food / Going up! if (parser->panic) { return; } //BUGFIX: statements no longer require the existing node ((*nodeHandle)->block.nodes[(*nodeHandle)->block.count++]) = *tmpNode; TOY_FREE(Toy_ASTNode, tmpNode); //simply free the tmpNode, so you don't free the children } } static void printStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //set the node info Toy_ASTNode* node = NULL; expression(parser, &node); Toy_emitASTNodeUnary(nodeHandle, TOY_OP_PRINT, node); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of print statement"); } static void assertStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //set the node info (*nodeHandle) = TOY_ALLOCATE(Toy_ASTNode, 1); //special case, because I'm lazy (*nodeHandle)->type = TOY_AST_NODE_BINARY; (*nodeHandle)->binary.opcode = TOY_OP_ASSERT; parsePrecedence(parser, &((*nodeHandle)->binary.left), PREC_TERNARY); consume(parser, TOY_TOKEN_COMMA, "Expected ',' in assert statement"); parsePrecedence(parser, &((*nodeHandle)->binary.right), PREC_TERNARY); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of assert statement"); } static void ifStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* condition = NULL; Toy_ASTNode* thenPath = NULL; Toy_ASTNode* elsePath = NULL; //read the condition consume(parser, TOY_TOKEN_PAREN_LEFT, "Expected '(' at beginning of if clause"); parsePrecedence(parser, &condition, PREC_TERNARY); //read the then path consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' at end of if clause"); declaration(parser, &thenPath); //read the optional else path if (match(parser, TOY_TOKEN_ELSE)) { declaration(parser, &elsePath); } Toy_emitASTNodeIf(nodeHandle, condition, thenPath, elsePath); } static void whileStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* condition = NULL; Toy_ASTNode* thenPath = NULL; //read the condition consume(parser, TOY_TOKEN_PAREN_LEFT, "Expected '(' at beginning of while clause"); parsePrecedence(parser, &condition, PREC_TERNARY); //read the then path consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' at end of while clause"); declaration(parser, &thenPath); Toy_emitASTNodeWhile(nodeHandle, condition, thenPath); } static void forStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* preClause = NULL; Toy_ASTNode* condition = NULL; Toy_ASTNode* postClause = NULL; Toy_ASTNode* thenPath = NULL; //read the clauses consume(parser, TOY_TOKEN_PAREN_LEFT, "Expected '(' at beginning of for clause"); //check the pre-clause if (parser->current.type != TOY_TOKEN_SEMICOLON) { //allow defining variables in the pre-clause if (match(parser, TOY_TOKEN_VAR)) { varDecl(parser, &preClause); } else { parsePrecedence(parser, &preClause, PREC_ASSIGNMENT); } } else { consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' after empty declaration of for clause"); Toy_emitASTNodePass(&preClause); } //check the condition clause if (parser->current.type != TOY_TOKEN_SEMICOLON) { parsePrecedence(parser, &condition, PREC_TERNARY); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' after condition of for clause"); } else { consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' after empty condition of for clause"); //empty clause defaults to forever Toy_Literal f = TOY_TO_BOOLEAN_LITERAL(true); Toy_emitASTNodeLiteral(&condition, f); } //check the postfix clause if (parser->current.type != TOY_TOKEN_PAREN_RIGHT) { parsePrecedence(parser, &postClause, PREC_ASSIGNMENT); consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' at end of for clause"); } else { consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' after empty increment of for clause"); Toy_emitASTNodePass(&postClause); } //read the path declaration(parser, &thenPath); Toy_emitASTNodeFor(nodeHandle, preClause, condition, postClause, thenPath); } static void breakStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_emitASTNodeBreak(nodeHandle); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of break statement"); } static void continueStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_emitASTNodeContinue(nodeHandle); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of continue statement"); } static void returnStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { Toy_ASTNode* returnValues = NULL; Toy_emitASTNodeFnCollection(&returnValues); if (!match(parser, TOY_TOKEN_SEMICOLON)) { do { //loop for multiple returns (disabled later in the pipeline) //append the node to the return list (grow the node if needed) if (returnValues->fnCollection.capacity < returnValues->fnCollection.count + 1) { int oldCapacity = returnValues->fnCollection.capacity; returnValues->fnCollection.capacity = TOY_GROW_CAPACITY(oldCapacity); returnValues->fnCollection.nodes = TOY_GROW_ARRAY(Toy_ASTNode, returnValues->fnCollection.nodes, oldCapacity, returnValues->fnCollection.capacity); } Toy_ASTNode* node = NULL; parsePrecedence(parser, &node, PREC_TERNARY); //BUGFIX if (!node) { error(parser, parser->previous, "[internal] No token found in return"); return; } returnValues->fnCollection.nodes[returnValues->fnCollection.count++] = *node; TOY_FREE(Toy_ASTNode, node); //free manually } while(match(parser, TOY_TOKEN_COMMA)); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of return statement"); } Toy_emitASTNodeFnReturn(nodeHandle, returnValues); } static void importStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //read the identifier Toy_ASTNode* node = NULL; advance(parser); identifier(parser, &node); if (node == NULL) { return; } Toy_Literal idn = Toy_copyLiteral(node->atomic.literal); Toy_freeASTNode(node); Toy_Literal alias = TOY_TO_NULL_LITERAL; if (match(parser, TOY_TOKEN_AS)) { Toy_ASTNode* node = NULL; advance(parser); identifier(parser, &node); alias = Toy_copyLiteral(node->atomic.literal); Toy_freeASTNode(node); } Toy_emitASTNodeImport(nodeHandle, idn, alias); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of import statement"); Toy_freeLiteral(idn); Toy_freeLiteral(alias); } //precedence functions static void expressionStmt(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //BUGFIX: check for empty statements if (match(parser, TOY_TOKEN_SEMICOLON)) { Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_NULL_LITERAL); return; } Toy_ASTNode* ptr = NULL; expression(parser, &ptr); if (ptr != NULL) { *nodeHandle = ptr; } consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at the end of expression statement"); } static void statement(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //block if (match(parser, TOY_TOKEN_BRACE_LEFT)) { blockStmt(parser, nodeHandle); return; } //print if (match(parser, TOY_TOKEN_PRINT)) { printStmt(parser, nodeHandle); return; } //assert if (match(parser, TOY_TOKEN_ASSERT)) { assertStmt(parser, nodeHandle); return; } //if-then-else if (match(parser, TOY_TOKEN_IF)) { ifStmt(parser, nodeHandle); return; } //while-then if (match(parser, TOY_TOKEN_WHILE)) { whileStmt(parser, nodeHandle); return; } //for-pre-clause-post-then if (match(parser, TOY_TOKEN_FOR)) { forStmt(parser, nodeHandle); return; } //break if (match(parser, TOY_TOKEN_BREAK)) { breakStmt(parser, nodeHandle); return; } //continue if (match(parser, TOY_TOKEN_CONTINUE)) { continueStmt(parser, nodeHandle); return; } //return if (match(parser, TOY_TOKEN_RETURN)) { returnStmt(parser, nodeHandle); return; } //import if (match(parser, TOY_TOKEN_IMPORT)) { importStmt(parser, nodeHandle); return; } //default expressionStmt(parser, nodeHandle); } //declarations and definitions static Toy_Literal readTypeToLiteral(Toy_Parser* parser) { advance(parser); Toy_Literal literal = TOY_TO_TYPE_LITERAL(TOY_LITERAL_NULL, false); switch(parser->previous.type) { case TOY_TOKEN_NULL: //NO-OP break; case TOY_TOKEN_BOOLEAN: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_BOOLEAN; break; case TOY_TOKEN_INTEGER: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_INTEGER; break; case TOY_TOKEN_FLOAT: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_FLOAT; break; case TOY_TOKEN_STRING: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_STRING; break; //array, dictionary - read the sub-types case TOY_TOKEN_BRACKET_LEFT: { Toy_Literal l = readTypeToLiteral(parser); if (match(parser, TOY_TOKEN_COLON)) { Toy_Literal r = readTypeToLiteral(parser); TOY_TYPE_PUSH_SUBTYPE(&literal, l); TOY_TYPE_PUSH_SUBTYPE(&literal, r); TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_DICTIONARY; } else { TOY_TYPE_PUSH_SUBTYPE(&literal, l); TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_ARRAY; } consume(parser, TOY_TOKEN_BRACKET_RIGHT, "Expected ']' at end of type definition"); } break; case TOY_TOKEN_FUNCTION: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_FUNCTION; break; case TOY_TOKEN_OPAQUE: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_OPAQUE; break; case TOY_TOKEN_ANY: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_ANY; break; //wtf case TOY_TOKEN_IDENTIFIER: { //duplicated from identifier() Toy_Token identifierToken = parser->previous; int length = identifierToken.length; //for safety if (length > 256) { length = 256; error(parser, parser->previous, "Identifiers can only be a maximum of 256 characters long"); } literal = TOY_TO_IDENTIFIER_LITERAL(Toy_createRefStringLength(identifierToken.lexeme, length)); } break; //WTF case TOY_TOKEN_TYPE: TOY_AS_TYPE(literal).typeOf = TOY_LITERAL_TYPE; break; default: error(parser, parser->previous, "Bad type signature"); return TOY_TO_NULL_LITERAL; } //const follows the type if (match(parser, TOY_TOKEN_CONST)) { TOY_AS_TYPE(literal).constant = true; } return literal; } static void varDecl(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //read the identifier consume(parser, TOY_TOKEN_IDENTIFIER, "Expected identifier after var keyword"); Toy_Token identifierToken = parser->previous; int length = identifierToken.length; //for safety if (length > 256) { length = 256; error(parser, parser->previous, "Identifiers can only be a maximum of 256 characters long"); } Toy_Literal identifier = TOY_TO_IDENTIFIER_LITERAL(Toy_createRefStringLength(identifierToken.lexeme, length)); //read the type, if present Toy_Literal typeLiteral; if (match(parser, TOY_TOKEN_COLON)) { typeLiteral = readTypeToLiteral(parser); } else { //default to non-const any typeLiteral = TOY_TO_TYPE_LITERAL(TOY_LITERAL_ANY, false); } //variable definition is an expression Toy_ASTNode* expressionNode = NULL; if (match(parser, TOY_TOKEN_ASSIGN)) { expression(parser, &expressionNode); } else { //values are null by default Toy_emitASTNodeLiteral(&expressionNode, TOY_TO_NULL_LITERAL); } //TODO: static type checking? //declare it Toy_emitASTNodeVarDecl(nodeHandle, identifier, typeLiteral, expressionNode); consume(parser, TOY_TOKEN_SEMICOLON, "Expected ';' at end of var declaration"); } static void fnDecl(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //read the identifier consume(parser, TOY_TOKEN_IDENTIFIER, "Expected identifier after fn keyword"); Toy_Token identifierToken = parser->previous; int length = identifierToken.length; //for safety if (length > 256) { length = 256; error(parser, parser->previous, "Identifiers can only be a maximum of 256 characters long"); } Toy_Literal identifier = TOY_TO_IDENTIFIER_LITERAL(Toy_createRefStringLength(identifierToken.lexeme, length)); //read the parameters and arity consume(parser, TOY_TOKEN_PAREN_LEFT, "Expected '(' after function identifier"); //for holding the array of arguments Toy_ASTNode* argumentNode = NULL; Toy_emitASTNodeFnCollection(&argumentNode); //read args if (!match(parser, TOY_TOKEN_PAREN_RIGHT)) { do { //check for rest parameter if (match(parser, TOY_TOKEN_REST)) { //read the argument identifier consume(parser, TOY_TOKEN_IDENTIFIER, "Expected identifier as function argument"); Toy_Token argIdentifierToken = parser->previous; int length = argIdentifierToken.length; //for safety if (length > 256) { length = 256; error(parser, parser->previous, "Identifiers can only be a maximum of 256 characters long"); } Toy_Literal argIdentifier = TOY_TO_IDENTIFIER_LITERAL(Toy_createRefStringLength(argIdentifierToken.lexeme, length)); //set the type (array of any types) Toy_Literal argTypeLiteral = TOY_TO_TYPE_LITERAL(TOY_LITERAL_FUNCTION_ARG_REST, false); //emit the node to the argument list (grow the node if needed) if (argumentNode->fnCollection.capacity < argumentNode->fnCollection.count + 1) { int oldCapacity = argumentNode->fnCollection.capacity; argumentNode->fnCollection.capacity = TOY_GROW_CAPACITY(oldCapacity); argumentNode->fnCollection.nodes = TOY_GROW_ARRAY(Toy_ASTNode, argumentNode->fnCollection.nodes, oldCapacity, argumentNode->fnCollection.capacity); } //store the arg in the array Toy_ASTNode* literalNode = NULL; Toy_emitASTNodeVarDecl(&literalNode, argIdentifier, argTypeLiteral, NULL); argumentNode->fnCollection.nodes[argumentNode->fnCollection.count++] = *literalNode; TOY_FREE(Toy_ASTNode, literalNode); break; } //read the argument identifier consume(parser, TOY_TOKEN_IDENTIFIER, "Expected identifier as function argument"); Toy_Token argIdentifierToken = parser->previous; int length = argIdentifierToken.length; //for safety if (length > 256) { length = 256; error(parser, parser->previous, "Identifiers can only be a maximum of 256 characters long"); } Toy_Literal argIdentifier = TOY_TO_IDENTIFIER_LITERAL(Toy_createRefStringLength(argIdentifierToken.lexeme, length)); //read optional type of the identifier Toy_Literal argTypeLiteral; if (match(parser, TOY_TOKEN_COLON)) { argTypeLiteral = readTypeToLiteral(parser); } else { //default to non-const any argTypeLiteral = TOY_TO_TYPE_LITERAL(TOY_LITERAL_ANY, false); } //emit the node to the argument list (grow the node if needed) if (argumentNode->fnCollection.capacity < argumentNode->fnCollection.count + 1) { int oldCapacity = argumentNode->fnCollection.capacity; argumentNode->fnCollection.capacity = TOY_GROW_CAPACITY(oldCapacity); argumentNode->fnCollection.nodes = TOY_GROW_ARRAY(Toy_ASTNode, argumentNode->fnCollection.nodes, oldCapacity, argumentNode->fnCollection.capacity); } //store the arg in the array Toy_ASTNode* literalNode = NULL; Toy_emitASTNodeVarDecl(&literalNode, argIdentifier, argTypeLiteral, NULL); argumentNode->fnCollection.nodes[argumentNode->fnCollection.count++] = *literalNode; TOY_FREE(Toy_ASTNode, literalNode); } while (match(parser, TOY_TOKEN_COMMA)); //if comma is read, continue consume(parser, TOY_TOKEN_PAREN_RIGHT, "Expected ')' after function argument list"); } //read the return types, if present Toy_ASTNode* returnNode = NULL; Toy_emitASTNodeFnCollection(&returnNode); if (match(parser, TOY_TOKEN_COLON)) { do { //append the node to the return list (grow the node if needed) if (returnNode->fnCollection.capacity < returnNode->fnCollection.count + 1) { int oldCapacity = returnNode->fnCollection.capacity; returnNode->fnCollection.capacity = TOY_GROW_CAPACITY(oldCapacity); returnNode->fnCollection.nodes = TOY_GROW_ARRAY(Toy_ASTNode, returnNode->fnCollection.nodes, oldCapacity, returnNode->fnCollection.capacity); } Toy_ASTNode* literalNode = NULL; Toy_emitASTNodeLiteral(&literalNode, readTypeToLiteral(parser)); returnNode->fnCollection.nodes[returnNode->fnCollection.count++] = *literalNode; TOY_FREE(Toy_ASTNode, literalNode); } while(match(parser, TOY_TOKEN_COMMA)); } //read the function body consume(parser, TOY_TOKEN_BRACE_LEFT, "Expected '{' after return list"); Toy_ASTNode* blockNode = NULL; blockStmt(parser, &blockNode); //declare it Toy_emitASTNodeFnDecl(nodeHandle, identifier, argumentNode, returnNode, blockNode); } static void declaration(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //assume nodeHandle holds a blank node //variable declarations if (match(parser, TOY_TOKEN_VAR)) { varDecl(parser, nodeHandle); } else if (match(parser, TOY_TOKEN_FUNCTION)) { fnDecl(parser, nodeHandle); } else { statement(parser, nodeHandle); } } //exposed functions void Toy_initParser(Toy_Parser* parser, Toy_Lexer* lexer) { parser->lexer = lexer; parser->error = false; parser->panic = false; parser->previous.type = TOY_TOKEN_NULL; parser->current.type = TOY_TOKEN_NULL; advance(parser); } void Toy_freeParser(Toy_Parser* parser) { parser->lexer = NULL; parser->error = false; parser->panic = false; parser->previous.type = TOY_TOKEN_NULL; parser->current.type = TOY_TOKEN_NULL; } Toy_ASTNode* Toy_scanParser(Toy_Parser* parser) { //check for EOF if (match(parser, TOY_TOKEN_EOF)) { return NULL; } //returns nodes on the heap Toy_ASTNode* node = NULL; //process the grammar rule for this line declaration(parser, &node); if (parser->panic) { synchronize(parser); //return an error node for this iteration Toy_freeASTNode(node); node = TOY_ALLOCATE(Toy_ASTNode, 1); node->type = TOY_AST_NODE_ERROR; } return node; }