From b00a6838bebde743a9ffa3f02b2225988f4e5048 Mon Sep 17 00:00:00 2001 From: Kayne Ruse Date: Thu, 12 Sep 2024 20:53:34 +1000 Subject: [PATCH] Wrote Toy_Parser with minimal features, tests missing It's too late at night, so I'm packing this up with only a dummy warning message for the tests. I'll keep going tomorrow, hopefully. --- source/toy_ast.c | 36 +-- source/toy_ast.h | 14 +- source/toy_keywords.c | 3 +- source/toy_lexer.h | 2 + source/toy_parser.c | 551 ++++++++++++++++++++++++++++++++++++++ source/toy_parser.h | 21 ++ source/toy_token_types.h | 1 + tests/cases/test_ast.c | 26 +- tests/cases/test_parser.c | 9 + 9 files changed, 628 insertions(+), 35 deletions(-) create mode 100644 tests/cases/test_parser.c diff --git a/source/toy_ast.c b/source/toy_ast.c index 7ee9045..104d63f 100644 --- a/source/toy_ast.c +++ b/source/toy_ast.c @@ -41,28 +41,34 @@ void Toy_private_emitAstValue(Toy_Bucket** bucket, Toy_Ast** handle, Toy_Value v } //TODO: flag range checks -void Toy_private_emitAstUnary(Toy_Bucket** bucket, Toy_Ast** handle, Toy_AstFlag flag, Toy_Ast* child) { - (*handle) = (Toy_Ast*)Toy_partBucket(bucket, sizeof(Toy_Ast)); +void Toy_private_emitAstUnary(Toy_Bucket** bucket, Toy_Ast** handle, Toy_AstFlag flag) { + Toy_Ast* tmp = (Toy_Ast*)Toy_partBucket(bucket, sizeof(Toy_Ast)); - (*handle)->unary.type = TOY_AST_UNARY; - (*handle)->unary.flag = flag; - (*handle)->unary.child = child; + tmp->unary.type = TOY_AST_UNARY; + tmp->unary.flag = flag; + tmp->unary.child = *handle; + + (*handle) = tmp; } -void Toy_private_emitAstBinary(Toy_Bucket** bucket, Toy_Ast** handle, Toy_AstFlag flag, Toy_Ast* left, Toy_Ast* right) { - (*handle) = (Toy_Ast*)Toy_partBucket(bucket, sizeof(Toy_Ast)); +void Toy_private_emitAstBinary(Toy_Bucket** bucket, Toy_Ast** handle, Toy_AstFlag flag, Toy_Ast* right) { + Toy_Ast* tmp = (Toy_Ast*)Toy_partBucket(bucket, sizeof(Toy_Ast)); - (*handle)->binary.type = TOY_AST_BINARY; - (*handle)->binary.flag = flag; - (*handle)->binary.left = left; - (*handle)->binary.right = right; + tmp->binary.type = TOY_AST_BINARY; + tmp->binary.flag = flag; + tmp->binary.left = *handle; //left-recursive + tmp->binary.right = right; + + (*handle) = tmp; } -void Toy_private_emitAstGroup(Toy_Bucket** bucket, Toy_Ast** handle, Toy_Ast* child) { - (*handle) = (Toy_Ast*)Toy_partBucket(bucket, sizeof(Toy_Ast)); +void Toy_private_emitAstGroup(Toy_Bucket** bucket, Toy_Ast** handle) { + Toy_Ast* tmp = (Toy_Ast*)Toy_partBucket(bucket, sizeof(Toy_Ast)); - (*handle)->group.type = TOY_AST_GROUP; - (*handle)->group.child = child; + tmp->group.type = TOY_AST_GROUP; + tmp->group.child = (*handle); + + (*handle) = tmp; } void Toy_private_emitAstPass(Toy_Bucket** bucket, Toy_Ast** handle) { diff --git a/source/toy_ast.h b/source/toy_ast.h index cdf8065..d9eec08 100644 --- a/source/toy_ast.h +++ b/source/toy_ast.h @@ -20,12 +20,20 @@ typedef enum Toy_AstType { //flags are handled differently by different types typedef enum Toy_AstFlag { + TOY_AST_FLAG_NONE, + //binary flags TOY_AST_FLAG_ADD, TOY_AST_FLAG_SUBTRACT, TOY_AST_FLAG_MULTIPLY, TOY_AST_FLAG_DIVIDE, TOY_AST_FLAG_MODULO, + TOY_AST_FLAG_ADD_ASSIGN, + TOY_AST_FLAG_SUBTRACT_ASSIGN, + TOY_AST_FLAG_MULTIPLY_ASSIGN, + TOY_AST_FLAG_DIVIDE_ASSIGN, + TOY_AST_FLAG_MODULO_ASSIGN, + TOY_AST_FLAG_ASSIGN, TOY_AST_FLAG_COMPARE_EQUAL, TOY_AST_FLAG_COMPARE_NOT, TOY_AST_FLAG_COMPARE_LESS, @@ -50,9 +58,9 @@ void Toy_private_initAstBlock(Toy_Bucket** bucket, Toy_Ast** handle); void Toy_private_appendAstBlock(Toy_Bucket** bucket, Toy_Ast** handle, Toy_Ast* child); void Toy_private_emitAstValue(Toy_Bucket** bucket, Toy_Ast** handle, Toy_Value value); -void Toy_private_emitAstUnary(Toy_Bucket** bucket, Toy_Ast** handle, Toy_AstFlag flag, Toy_Ast* child); -void Toy_private_emitAstBinary(Toy_Bucket** bucket, Toy_Ast** handle,Toy_AstFlag flag, Toy_Ast* left, Toy_Ast* right); -void Toy_private_emitAstGroup(Toy_Bucket** bucket, Toy_Ast** handle, Toy_Ast* child); +void Toy_private_emitAstUnary(Toy_Bucket** bucket, Toy_Ast** handle, Toy_AstFlag flag); +void Toy_private_emitAstBinary(Toy_Bucket** bucket, Toy_Ast** handle,Toy_AstFlag flag, Toy_Ast* right); +void Toy_private_emitAstGroup(Toy_Bucket** bucket, Toy_Ast** handle); void Toy_private_emitAstPass(Toy_Bucket** bucket, Toy_Ast** handle); void Toy_private_emitAstError(Toy_Bucket** bucket, Toy_Ast** handle); diff --git a/source/toy_keywords.c b/source/toy_keywords.c index f185a2e..dec78a4 100644 --- a/source/toy_keywords.c +++ b/source/toy_keywords.c @@ -14,7 +14,7 @@ const Toy_KeywordTypeTuple Toy_private_keywords[] = { {TOY_TOKEN_TYPE_STRING, "string"}, // TOY_TOKEN_TYPE_ARRAY, // TOY_TOKEN_TYPE_DICTIONARY, - {TOY_TOKEN_TYPE_FUNCTION, "fn"}, + // TOY_TOKEN_TYPE_FUNCTION, {TOY_TOKEN_TYPE_OPAQUE, "opaque"}, {TOY_TOKEN_TYPE_ANY, "any"}, @@ -30,6 +30,7 @@ const Toy_KeywordTypeTuple Toy_private_keywords[] = { {TOY_TOKEN_KEYWORD_EXPORT, "export"}, {TOY_TOKEN_KEYWORD_FOR, "for"}, {TOY_TOKEN_KEYWORD_FOREACH, "foreach"}, + {TOY_TOKEN_KEYWORD_FUNCTION, "fn"}, {TOY_TOKEN_KEYWORD_IF, "if"}, {TOY_TOKEN_KEYWORD_IMPORT, "import"}, {TOY_TOKEN_KEYWORD_IN, "in"}, diff --git a/source/toy_lexer.h b/source/toy_lexer.h index 8ca2aca..9fe985b 100644 --- a/source/toy_lexer.h +++ b/source/toy_lexer.h @@ -23,3 +23,5 @@ TOY_API void Toy_bindLexer(Toy_Lexer* lexer, const char* source); TOY_API Toy_Token Toy_private_scanLexer(Toy_Lexer* lexer); TOY_API void Toy_private_printToken(Toy_Token* token); //debugging +//util +#define TOY_BLANK_TOKEN() ((Toy_Token){TOY_TOKEN_NULL, 0, 0, NULL}) diff --git a/source/toy_parser.c b/source/toy_parser.c index e69de29..ab45cb4 100644 --- a/source/toy_parser.c +++ b/source/toy_parser.c @@ -0,0 +1,551 @@ +#include "toy_parser.h" +#include "toy_console_colors.h" + +#include + +//utilities +static void printError(Toy_Parser* parser, Toy_Token token, const char* errorMsg) { + //keep going while panicking + if (parser->panic) { + return; + } + + fprintf(stderr, TOY_CC_ERROR "[Line %d] Error ", token.line); + + //check type + if (token.type == TOY_TOKEN_EOF) { + fprintf(stderr, "at end"); + } + else { + fprintf(stderr, "at '%.*s'", token.length, token.lexeme); + } + + //finally + fprintf(stderr, ": %s\n" TOY_CC_RESET, errorMsg); + parser->error = true; + parser->panic = true; +} + +static void advance(Toy_Parser* parser) { + parser->previous = parser->current; + parser->current = Toy_private_scanLexer(parser->lexer); + + if (parser->current.type == TOY_TOKEN_ERROR) { + printError(parser, parser->current, "Read error"); + } +} + +static bool match(Toy_Parser* parser, Toy_TokenType tokenType) { + if (parser->current.type == tokenType) { + advance(parser); + return true; + } + return false; +} + +static void consume(Toy_Parser* parser, Toy_TokenType tokenType, const char* msg) { + if (parser->current.type != tokenType) { + printError(parser, parser->current, msg); + return; + } + + advance(parser); +} + +static void synchronize(Toy_Parser* parser) { + while (parser->current.type != TOY_TOKEN_EOF) { + switch(parser->current.type) { + //these tokens can start a statement + case TOY_TOKEN_KEYWORD_ASSERT: + case TOY_TOKEN_KEYWORD_BREAK: + case TOY_TOKEN_KEYWORD_CLASS: + case TOY_TOKEN_KEYWORD_CONTINUE: + case TOY_TOKEN_KEYWORD_DO: + case TOY_TOKEN_KEYWORD_EXPORT: + case TOY_TOKEN_KEYWORD_FOR: + case TOY_TOKEN_KEYWORD_FOREACH: + case TOY_TOKEN_KEYWORD_FUNCTION: + case TOY_TOKEN_KEYWORD_IF: + case TOY_TOKEN_KEYWORD_IMPORT: + case TOY_TOKEN_KEYWORD_PRINT: + case TOY_TOKEN_KEYWORD_RETURN: + case TOY_TOKEN_KEYWORD_VAR: + case TOY_TOKEN_KEYWORD_WHILE: + parser->error = true; + parser->panic = false; + return; + + default: + advance(parser); + } + } +} + +//precedence declarations +typedef enum ParsingPrecedence { + PREC_NONE, + PREC_ASSIGNMENT, + PREC_GROUP, + PREC_TERNARY, + PREC_OR, + PREC_AND, + PREC_COMPARISON, + PREC_TERM, + PREC_FACTOR, + PREC_UNARY, + PREC_CALL, + PREC_PRIMARY, +} ParsingPrecedence; + +typedef Toy_AstFlag (*ParsingRule)(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root); + +typedef struct ParsingTuple { + ParsingPrecedence precedence; + ParsingRule prefix; + ParsingRule infix; +} ParsingTuple; + +static void parsePrecedence(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root, ParsingPrecedence precRule); + +static Toy_AstFlag atomic(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root); +static Toy_AstFlag unary(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root); +static Toy_AstFlag binary(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root); +static Toy_AstFlag group(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root); + +//precedence definitions +static ParsingTuple parsingRulesetTable[] = { + {PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_NULL, + + //variable names + {PREC_NONE,NULL,NULL},// TOY_TOKEN_IDENTIFIER, + + //types + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_TYPE, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_BOOLEAN, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_INTEGER, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_FLOAT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_STRING, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_ARRAY, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_DICTIONARY, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_FUNCTION, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_OPAQUE, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_TYPE_ANY, + + //keywords and reserved words + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_AS, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_ASSERT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_BREAK, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_CLASS, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_CONST, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_CONTINUE, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_DO, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_ELSE, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_EXPORT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_FOR, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_FOREACH, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_FUNCTION, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_IF, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_IMPORT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_IN, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_OF, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_PRINT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_RETURN, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_TYPEAS, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_TYPEOF, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_VAR, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_KEYWORD_WHILE, + + //literal values + {PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_TRUE, + {PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_FALSE, + {PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_INTEGER, + {PREC_PRIMARY,atomic,NULL},// TOY_TOKEN_LITERAL_FLOAT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_LITERAL_STRING, + + //math operators + {PREC_TERM,NULL,binary},// TOY_TOKEN_OPERATOR_ADD, + {PREC_TERM,unary,binary},// TOY_TOKEN_OPERATOR_SUBTRACT, + {PREC_FACTOR,NULL,binary},// TOY_TOKEN_OPERATOR_MULTIPLY, + {PREC_FACTOR,NULL,binary},// TOY_TOKEN_OPERATOR_DIVIDE, + {PREC_FACTOR,NULL,binary},// TOY_TOKEN_OPERATOR_MODULO, + {PREC_ASSIGNMENT,NULL,binary},// TOY_TOKEN_OPERATOR_ADD_ASSIGN, + {PREC_ASSIGNMENT,NULL,binary},// TOY_TOKEN_OPERATOR_SUBTRACT_ASSIGN, + {PREC_ASSIGNMENT,NULL,binary},// TOY_TOKEN_OPERATOR_MULTIPLY_ASSIGN, + {PREC_ASSIGNMENT,NULL,binary},// TOY_TOKEN_OPERATOR_DIVIDE_ASSIGN, + {PREC_ASSIGNMENT,NULL,binary},// TOY_TOKEN_OPERATOR_MODULO_ASSIGN, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_INCREMENT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_DECREMENT, + {PREC_ASSIGNMENT,NULL,binary},// TOY_TOKEN_OPERATOR_ASSIGN, + + //comparator operators + {PREC_COMPARISON,NULL,binary},// TOY_TOKEN_OPERATOR_COMPARE_EQUAL, + {PREC_COMPARISON,NULL,binary},// TOY_TOKEN_OPERATOR_COMPARE_NOT, + {PREC_COMPARISON,NULL,binary},// TOY_TOKEN_OPERATOR_COMPARE_LESS, + {PREC_COMPARISON,NULL,binary},// TOY_TOKEN_OPERATOR_COMPARE_LESS_EQUAL, + {PREC_COMPARISON,NULL,binary},// TOY_TOKEN_OPERATOR_COMPARE_GREATER, + {PREC_COMPARISON,NULL,binary},// TOY_TOKEN_OPERATOR_COMPARE_GREATER_EQUAL, + + //structural operators + {PREC_NONE,group,NULL},// TOY_TOKEN_OPERATOR_PAREN_LEFT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_PAREN_RIGHT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_BRACKET_LEFT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_BRACKET_RIGHT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_BRACE_LEFT, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_BRACE_RIGHT, + + //other operators + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_AND, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_OR, + {PREC_NONE,unary,NULL},// TOY_TOKEN_OPERATOR_NEGATE, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_QUESTION, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_COLON, + + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_CONCAT, // .. + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_REST, // ... + + //unused operators + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_AMPERSAND, // & + {PREC_NONE,NULL,NULL},// TOY_TOKEN_OPERATOR_PIPE, // | + + //meta tokens + {PREC_NONE,NULL,NULL},// TOY_TOKEN_PASS, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_ERROR, + {PREC_NONE,NULL,NULL},// TOY_TOKEN_EOF, +}; + +static Toy_AstFlag atomic(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + switch(parser->previous.type) { + case TOY_TOKEN_NULL: + Toy_private_emitAstValue(bucket, root, TOY_VALUE_TO_NULL()); + return TOY_AST_FLAG_NONE; + + case TOY_TOKEN_LITERAL_TRUE: + Toy_private_emitAstValue(bucket, root, TOY_VALUE_TO_BOOLEAN(true)); + return TOY_AST_FLAG_NONE; + + case TOY_TOKEN_LITERAL_FALSE: + Toy_private_emitAstValue(bucket, root, TOY_VALUE_TO_BOOLEAN(false)); + return TOY_AST_FLAG_NONE; + + case TOY_TOKEN_LITERAL_INTEGER: { + //filter the '_' character + char buffer[parser->previous.length]; + + int i = 0, o = 0; + do { + buffer[i] = parser->previous.lexeme[o]; + if (buffer[i] != '_') i++; + } while (parser->previous.lexeme[o++]); + + int value = 0; + sscanf(buffer, "%d", &value); + Toy_private_emitAstValue(bucket, root, TOY_VALUE_TO_INTEGER(value)); + return TOY_AST_FLAG_NONE; + } + + case TOY_TOKEN_LITERAL_FLOAT: { + //filter the '_' character + char buffer[parser->previous.length]; + + int i = 0, o = 0; + do { + buffer[i] = parser->previous.lexeme[o]; + if (buffer[i] != '_') i++; + } while (parser->previous.lexeme[o++]); + + float value = 0; + sscanf(buffer, "%f", &value); + Toy_private_emitAstValue(bucket, root, TOY_VALUE_TO_FLOAT(value)); + return TOY_AST_FLAG_NONE; + } + + default: + printError(parser, parser->previous, "Unexpected token passed to atomic precedence rule"); + Toy_private_emitAstError(bucket, root); //TODO: better error message here? + return TOY_AST_FLAG_NONE; + } +} + +static Toy_AstFlag unary(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + if (parser->previous.type == TOY_TOKEN_OPERATOR_SUBTRACT || parser->previous.type == TOY_TOKEN_OPERATOR_NEGATE) { + //read what to negate + parsePrecedence(bucket, parser, root, PREC_UNARY); + + //actually emit the negation node + Toy_private_emitAstUnary(bucket, root, TOY_AST_FLAG_NEGATE); + } + + else { + printError(parser, parser->previous, "Unexpected token passed to unary precedence rule"); + Toy_private_emitAstError(bucket, root); //TODO: better error message here? + } + + return TOY_AST_FLAG_NONE; +} + +static Toy_AstFlag binary(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + //infix must advance + advance(parser); + + switch(parser->previous.type) { + //arithmetic + case TOY_TOKEN_OPERATOR_ADD: { + parsePrecedence(bucket, parser, root, PREC_TERM + 1); + return TOY_AST_FLAG_ADD; + } + + case TOY_TOKEN_OPERATOR_SUBTRACT: { + parsePrecedence(bucket, parser, root, PREC_TERM + 1); + return TOY_AST_FLAG_SUBTRACT; + } + + case TOY_TOKEN_OPERATOR_MULTIPLY: { + parsePrecedence(bucket, parser, root, PREC_TERM + 1); + return TOY_AST_FLAG_MULTIPLY; + } + + case TOY_TOKEN_OPERATOR_DIVIDE: { + parsePrecedence(bucket, parser, root, PREC_TERM + 1); + return TOY_AST_FLAG_DIVIDE; + } + + case TOY_TOKEN_OPERATOR_MODULO: { + parsePrecedence(bucket, parser, root, PREC_TERM + 1); + return TOY_AST_FLAG_MODULO; + } + + //assignment + case TOY_TOKEN_OPERATOR_ASSIGN: { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT + 1); + return TOY_AST_FLAG_ASSIGN; + } + + case TOY_TOKEN_OPERATOR_ADD_ASSIGN: { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT + 1); + return TOY_AST_FLAG_ADD_ASSIGN; + } + + case TOY_TOKEN_OPERATOR_SUBTRACT_ASSIGN: { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT + 1); + return TOY_AST_FLAG_SUBTRACT_ASSIGN; + } + + case TOY_TOKEN_OPERATOR_MULTIPLY_ASSIGN: { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT + 1); + return TOY_AST_FLAG_MULTIPLY_ASSIGN; + } + + case TOY_TOKEN_OPERATOR_DIVIDE_ASSIGN: { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT + 1); + return TOY_AST_FLAG_DIVIDE_ASSIGN; + } + + case TOY_TOKEN_OPERATOR_MODULO_ASSIGN: { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT + 1); + return TOY_AST_FLAG_MODULO_ASSIGN; + } + + //comparison + case TOY_TOKEN_OPERATOR_COMPARE_EQUAL: { + parsePrecedence(bucket, parser, root, PREC_COMPARISON + 1); + return TOY_AST_FLAG_COMPARE_EQUAL; + } + + case TOY_TOKEN_OPERATOR_COMPARE_NOT: { + parsePrecedence(bucket, parser, root, PREC_COMPARISON + 1); + return TOY_AST_FLAG_COMPARE_NOT; + } + + case TOY_TOKEN_OPERATOR_COMPARE_LESS: { + parsePrecedence(bucket, parser, root, PREC_COMPARISON + 1); + return TOY_AST_FLAG_COMPARE_LESS; + } + + case TOY_TOKEN_OPERATOR_COMPARE_LESS_EQUAL: { + parsePrecedence(bucket, parser, root, PREC_COMPARISON + 1); + return TOY_AST_FLAG_COMPARE_LESS_EQUAL; + } + + case TOY_TOKEN_OPERATOR_COMPARE_GREATER: { + parsePrecedence(bucket, parser, root, PREC_COMPARISON + 1); + return TOY_AST_FLAG_COMPARE_GREATER; + } + + case TOY_TOKEN_OPERATOR_COMPARE_GREATER_EQUAL: { + parsePrecedence(bucket, parser, root, PREC_COMPARISON + 1); + return TOY_AST_FLAG_COMPARE_GREATER_EQUAL; + } + + default: + printError(parser, parser->previous, "Unexpected token passed to binary precedence rule"); + Toy_private_emitAstError(bucket, root); //TODO: better error message here? + return TOY_AST_FLAG_NONE; + } +} + +static Toy_AstFlag group(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + //groups are () + if (parser->previous.type == TOY_TOKEN_OPERATOR_PAREN_LEFT) { + parsePrecedence(bucket, parser, root, PREC_GROUP); + consume(parser, TOY_TOKEN_OPERATOR_PAREN_RIGHT, "Expected ')' at end of group"); + + Toy_private_emitAstGroup(bucket, root); + } + + else { + printError(parser, parser->previous, "Unexpected token passed to grouping precedence rule"); + Toy_private_emitAstError(bucket, root); //TODO: better error message here? + } + + return TOY_AST_FLAG_NONE; +} + +static ParsingTuple* getParsingRule(Toy_TokenType type) { + return &parsingRulesetTable[type]; +} + +//grammar rules +static void parsePrecedence(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root, ParsingPrecedence precRule) { + //'step over' the token to parse + advance(parser); + + //every valid expression has a prefix rule + ParsingRule prefix = getParsingRule(parser->previous.type)->prefix; + + if (prefix == NULL) { + printError(parser, parser->previous, "Expected expression"); + Toy_private_emitAstError(bucket, root); //TODO: better error message here? + return; + } + + prefix(bucket, parser, root); + + //infix rules are left-recursive + while (precRule <= getParsingRule(parser->current.type)->precedence) { + ParsingRule infix = getParsingRule(parser->current.type)->infix; + + if (infix == NULL) { + printError(parser, parser->previous, "Expected operator"); + Toy_private_emitAstError(bucket, root); //TODO: better error message here? + return; + } + + Toy_Ast* ptr = NULL; + Toy_AstFlag flag = infix(bucket, parser, &ptr); + + //finished + if (flag == TOY_AST_FLAG_NONE) { + (*root) = ptr; + return; + } + + Toy_private_emitAstBinary(bucket, root, flag, ptr); + } + + //can't assign below a certain precedence + if (precRule <= PREC_ASSIGNMENT && match(parser, TOY_TOKEN_OPERATOR_ASSIGN)) { + printError(parser, parser->current, "Invalid assignment target"); + } +} + +static void makeExpr(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + parsePrecedence(bucket, parser, root, PREC_ASSIGNMENT); +} + +static void makeExprStmt(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + //check for empty lines + if (match(parser, TOY_TOKEN_OPERATOR_SEMICOLON)) { + Toy_private_emitAstPass(bucket, root); + return; + } + + makeExpr(bucket, parser, root); + consume(parser, TOY_TOKEN_OPERATOR_SEMICOLON, "Expected ';' at the end of expression statement"); +} + +static void makeStmt(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + //block + //print + //assert + //if-then-else + //while-then + //for-pre-clause-post-then + //break + //continue + //return + //import + + //default + makeExprStmt(bucket, parser, root); +} + +static void makeDeclarationStmt(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + // //variable declarations + // if (match(parser, TOY_TOKEN_KEYWORD_VAR)) { + // makeVariableDeclarationStmt(bucket, parser, root); + // } + + // //function declarations + // else if (match(parser, TOY_TOKEN_KEYWORD_FUNCTION)) { + // makeFunctionDeclarationStmt(bucket, parser, root); + // } + + //otherwise + // else { + makeStmt(bucket, parser, root); + // } +} + +static void makeBlockStmt(Toy_Bucket** bucket, Toy_Parser* parser, Toy_Ast** root) { + //begin the block + Toy_private_initAstBlock(bucket, root); + + //read a series of statements into the block + while (!match(parser, TOY_TOKEN_EOF)) { + //process the grammar rules + Toy_Ast* stmt = NULL; + makeDeclarationStmt(bucket, parser, &stmt); + + //if something went wrong + if (parser->panic) { + synchronize(parser); + + Toy_Ast* err = NULL; + Toy_private_emitAstError(bucket, &err); //TODO: better error message here? + Toy_private_appendAstBlock(bucket, root, err); + + continue; + } + Toy_private_appendAstBlock(bucket, root, stmt); + } +} + +//exposed functions +void Toy_bindParser(Toy_Parser* parser, Toy_Lexer* lexer) { + Toy_resetParser(parser); + parser->lexer = lexer; +} + +Toy_Ast* Toy_scanParser(Toy_Bucket** bucket, Toy_Parser* parser) { + //check for EOF + if (match(parser, TOY_TOKEN_EOF)) { + return NULL; + } + + //TODO: better errors, check for unbound parser, etc. + + Toy_Ast* root = NULL; + makeBlockStmt(bucket, parser, &root); + + return root; +} + +void Toy_resetParser(Toy_Parser* parser) { + parser->lexer = NULL; + + parser->current = TOY_BLANK_TOKEN(); + parser->previous = TOY_BLANK_TOKEN(); + + parser->error = false; + parser->panic = false; +} diff --git a/source/toy_parser.h b/source/toy_parser.h index e69de29..bdcf9b1 100644 --- a/source/toy_parser.h +++ b/source/toy_parser.h @@ -0,0 +1,21 @@ +#pragma once + +#include "toy_common.h" +#include "toy_memory.h" +#include "toy_lexer.h" +#include "toy_ast.h" + +typedef struct Toy_Parser { + Toy_Lexer* lexer; + + //last two outputs + Toy_Token current; + Toy_Token previous; + + bool error; + bool panic; //currently processing an error +} Toy_Parser; + +TOY_API void Toy_bindParser(Toy_Parser* parser, Toy_Lexer* lexer); +TOY_API Toy_Ast* Toy_scanParser(Toy_Bucket** bucket, Toy_Parser* parser); +TOY_API void Toy_resetParser(Toy_Parser* parser); diff --git a/source/toy_token_types.h b/source/toy_token_types.h index 0a80c44..f60eaba 100644 --- a/source/toy_token_types.h +++ b/source/toy_token_types.h @@ -32,6 +32,7 @@ typedef enum Toy_TokenType { TOY_TOKEN_KEYWORD_EXPORT, TOY_TOKEN_KEYWORD_FOR, TOY_TOKEN_KEYWORD_FOREACH, + TOY_TOKEN_KEYWORD_FUNCTION, TOY_TOKEN_KEYWORD_IF, TOY_TOKEN_KEYWORD_IMPORT, TOY_TOKEN_KEYWORD_IN, diff --git a/tests/cases/test_ast.c b/tests/cases/test_ast.c index e9b08e5..5e73c77 100644 --- a/tests/cases/test_ast.c +++ b/tests/cases/test_ast.c @@ -62,9 +62,8 @@ int test_type_emission() { //build the AST Toy_Ast* ast = NULL; - Toy_Ast* child = NULL; - Toy_private_emitAstValue(&bucket, &child, TOY_VALUE_TO_INTEGER(42)); - Toy_private_emitAstUnary(&bucket, &ast, TOY_AST_FLAG_NEGATE, child); + Toy_private_emitAstValue(&bucket, &ast, TOY_VALUE_TO_INTEGER(42)); + Toy_private_emitAstUnary(&bucket, &ast, TOY_AST_FLAG_NEGATE); //check if it worked if ( @@ -90,11 +89,10 @@ int test_type_emission() { //build the AST Toy_Ast* ast = NULL; - Toy_Ast* left = NULL; Toy_Ast* right = NULL; - Toy_private_emitAstValue(&bucket, &left, TOY_VALUE_TO_INTEGER(42)); + Toy_private_emitAstValue(&bucket, &ast, TOY_VALUE_TO_INTEGER(42)); Toy_private_emitAstValue(&bucket, &right, TOY_VALUE_TO_INTEGER(69)); - Toy_private_emitAstBinary(&bucket, &ast, TOY_AST_FLAG_ADD, left, right); + Toy_private_emitAstBinary(&bucket, &ast, TOY_AST_FLAG_ADD, right); //check if it worked if ( @@ -122,13 +120,11 @@ int test_type_emission() { //build the AST Toy_Ast* ast = NULL; - Toy_Ast* addition = NULL; - Toy_Ast* left = NULL; Toy_Ast* right = NULL; - Toy_private_emitAstValue(&bucket, &left, TOY_VALUE_TO_INTEGER(42)); + Toy_private_emitAstValue(&bucket, &ast, TOY_VALUE_TO_INTEGER(42)); Toy_private_emitAstValue(&bucket, &right, TOY_VALUE_TO_INTEGER(69)); - Toy_private_emitAstBinary(&bucket, &addition, TOY_AST_FLAG_ADD, left, right); - Toy_private_emitAstGroup(&bucket, &ast, addition); + Toy_private_emitAstBinary(&bucket, &ast, TOY_AST_FLAG_ADD, right); + Toy_private_emitAstGroup(&bucket, &ast); //check if it worked if ( @@ -164,13 +160,11 @@ int test_type_emission() { for (int i = 0; i < 5; i++) { //build the AST Toy_Ast* ast = NULL; - Toy_Ast* addition = NULL; - Toy_Ast* left = NULL; Toy_Ast* right = NULL; - Toy_private_emitAstValue(&bucket, &left, TOY_VALUE_TO_INTEGER(42)); + Toy_private_emitAstValue(&bucket, &ast, TOY_VALUE_TO_INTEGER(42)); Toy_private_emitAstValue(&bucket, &right, TOY_VALUE_TO_INTEGER(69)); - Toy_private_emitAstBinary(&bucket, &addition, TOY_AST_FLAG_ADD, left, right); - Toy_private_emitAstGroup(&bucket, &ast, addition); + Toy_private_emitAstBinary(&bucket, &ast, TOY_AST_FLAG_ADD, right); + Toy_private_emitAstGroup(&bucket, &ast); Toy_private_appendAstBlock(&bucket, &block, ast); } diff --git a/tests/cases/test_parser.c b/tests/cases/test_parser.c new file mode 100644 index 0000000..f2c2b89 --- /dev/null +++ b/tests/cases/test_parser.c @@ -0,0 +1,9 @@ +#include "toy_parser.h" +#include "toy_console_colors.h" + +#include + +int main() { + fprintf(stderr, TOY_CC_WARN "WARNING: Test suite for Toy_Parser is not yet implemented\n" TOY_CC_RESET); + return 0; +}