From 1ff32fe101af22cc1114e8a183f7f052eaeb8c3d Mon Sep 17 00:00:00 2001 From: Kayne Ruse Date: Wed, 3 Aug 2022 14:06:54 +0100 Subject: [PATCH] Lexer partially working --- scripts/example.toy | 4 + scripts/test.toy | 1 - source/debug.c | 23 --- source/debug.h | 3 - source/lexer.c | 23 +++ source/lexer.h | 1 + source/literal.c | 59 ++++++++ source/literal.h | 74 +++++++++ source/memory.c | 22 +++ source/memory.h | 12 ++ source/node.c | 53 +++++++ source/node.h | 43 ++++++ source/opcodes.h | 14 ++ source/parser.c | 349 +++++++++++++++++++++++++++++++++++++++++++ source/parser.h | 21 +++ source/repl_main.c | 18 ++- source/token_types.h | 1 - 17 files changed, 687 insertions(+), 34 deletions(-) create mode 100644 scripts/example.toy delete mode 100644 scripts/test.toy create mode 100644 source/literal.c create mode 100644 source/literal.h create mode 100644 source/memory.c create mode 100644 source/memory.h create mode 100644 source/node.c create mode 100644 source/node.h create mode 100644 source/opcodes.h create mode 100644 source/parser.c create mode 100644 source/parser.h diff --git a/scripts/example.toy b/scripts/example.toy new file mode 100644 index 0000000..c265ca3 --- /dev/null +++ b/scripts/example.toy @@ -0,0 +1,4 @@ +print "hello world"; +print null; +print true; +print false; diff --git a/scripts/test.toy b/scripts/test.toy deleted file mode 100644 index c71d05d..0000000 --- a/scripts/test.toy +++ /dev/null @@ -1 +0,0 @@ -print "Hello world"; \ No newline at end of file diff --git a/source/debug.c b/source/debug.c index bb24956..7aecd8b 100644 --- a/source/debug.c +++ b/source/debug.c @@ -6,29 +6,6 @@ #include #include -void printToken(Token* token) { - if (token->type == TOKEN_ERROR) { - printf("Error\t%d\t%.*s\n", token->line, token->length, token->lexeme); - return; - } - - printf("\t%d\t%d\t", token->type, token->line); - - if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_LITERAL_INTEGER || token->type == TOKEN_LITERAL_FLOAT || token->type == TOKEN_LITERAL_STRING) { - printf("%.*s\t", token->length, token->lexeme); - } else { - char* keyword = findKeywordByType(token->type); - - if (keyword != NULL) { - printf("%s", keyword); - } else { - printf("-"); - } - } - - printf("\n"); -} - //declare the singleton Command command; diff --git a/source/debug.h b/source/debug.h index b0dd542..7dc7ad2 100644 --- a/source/debug.h +++ b/source/debug.h @@ -1,9 +1,6 @@ #pragma once #include "common.h" -#include "lexer.h" - -void printToken(Token* token); //for processing the command line arguments typedef struct { diff --git a/source/lexer.c b/source/lexer.c index 6990fb3..267a41e 100644 --- a/source/lexer.c +++ b/source/lexer.c @@ -294,4 +294,27 @@ Token scanLexer(Lexer* lexer) { default: return makeErrorToken(lexer, "Unexpected token"); } +} + +void printToken(Token* token) { + if (token->type == TOKEN_ERROR) { + printf("Error\t%d\t%.*s\n", token->line, token->length, token->lexeme); + return; + } + + printf("\t%d\t%d\t", token->type, token->line); + + if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_LITERAL_INTEGER || token->type == TOKEN_LITERAL_FLOAT || token->type == TOKEN_LITERAL_STRING) { + printf("%.*s\t", token->length, token->lexeme); + } else { + char* keyword = findKeywordByType(token->type); + + if (keyword != NULL) { + printf("%s", keyword); + } else { + printf("-"); + } + } + + printf("\n"); } \ No newline at end of file diff --git a/source/lexer.h b/source/lexer.h index ee5156f..74595a4 100644 --- a/source/lexer.h +++ b/source/lexer.h @@ -22,3 +22,4 @@ typedef struct { void initLexer(Lexer* lexer, char* source); Token scanLexer(Lexer* lexer); +void printToken(Token* token); \ No newline at end of file diff --git a/source/literal.c b/source/literal.c new file mode 100644 index 0000000..3ec5570 --- /dev/null +++ b/source/literal.c @@ -0,0 +1,59 @@ +#include "literal.h" +#include "memory.h" +#include "debug.h" + +#include +#include + +void printLiteral(Literal literal) { + switch(literal.type) { + case LITERAL_NULL: + printf("null\n"); + break; + + case LITERAL_BOOLEAN: + printf(AS_BOOLEAN(literal) ? "true\n" : "false\n"); + break; + + case LITERAL_INTEGER: + printf("%d\n", AS_INTEGER(literal)); + break; + + case LITERAL_FLOAT: + printf("%g\n", AS_FLOAT(literal)); + break; + + case LITERAL_STRING: + printf("%.*s (%d)\n", STRLEN(literal), AS_STRING(literal), STRLEN(literal)); + break; + + case LITERAL_FUNCTION: + printf("\n"); + break; + + default: + //should never bee seen + fprintf(stderr, "[Internal] Unrecognized literal type: %d", literal.type); + } +} + +void freeLiteral(Literal literal) { + if (IS_STRING(literal)) { + FREE(char, AS_STRING(literal)); + return; + } +} + +bool _isTruthy(Literal x) { + return (IS_NULL(x) || (IS_BOOLEAN(x) && AS_BOOLEAN(x)) || (IS_INTEGER(x) && AS_INTEGER(x) != 0) || (IS_FLOAT(x) && AS_FLOAT(x) != 0)); +} + +Literal _toStringLiteral(char* cstr) { + return ((Literal){LITERAL_STRING, { .string.ptr = (char*)cstr, .string.length = strlen((char*)cstr) }}); +} + +char* copyString(char* original, int length) { + char* buffer = ALLOCATE(char, length + 1); + strncpy(buffer, original, length); + return buffer; +} \ No newline at end of file diff --git a/source/literal.h b/source/literal.h new file mode 100644 index 0000000..9c4d881 --- /dev/null +++ b/source/literal.h @@ -0,0 +1,74 @@ +#pragma once + +#include "common.h" + +#include + +typedef enum { + LITERAL_NULL, + LITERAL_BOOLEAN, + LITERAL_INTEGER, + LITERAL_FLOAT, + LITERAL_STRING, + LITERAL_ARRAY, + LITERAL_DICTIONARY, + LITERAL_FUNCTION, +} LiteralType; + +typedef struct { + LiteralType type; + union { + bool boolean; + int integer; + float number; + struct { + char* ptr; + int length; //could possibly cut it down further by removing this + } string; + + // //experimental + // void* array; + // void* dictionary; + // void* function; + } as; +} Literal; + +#define IS_NULL(value) ((value).type == LITERAL_NULL) +#define IS_BOOLEAN(value) ((value).type == LITERAL_BOOLEAN) +#define IS_INTEGER(value) ((value).type == LITERAL_INTEGER) +#define IS_FLOAT(value) ((value).type == LITERAL_FLOAT) +#define IS_STRING(value) ((value).type == LITERAL_STRING) +#define IS_ARRAY(value) ((value).type == LITERAL_ARRAY) +#define IS_DICTIONARY(value) ((value).type == LITERAL_DICTIONARY) +#define IS_FUNCTION(value) ((value).type == LITERAL_FUNCTION) + +#define AS_BOOLEAN(value) ((value).as.boolean) +#define AS_INTEGER(value) ((value).as.integer) +#define AS_FLOAT(value) ((value).as.number) +#define AS_STRING(value) ((value).as.string.ptr) +// #define AS_ARRAY_PTR(value) +// #define AS_DICTIONARY_PTR(value) +// #define AS_FUNCTION_PTR(value) ((Function*)((value).as.function)) + +#define TO_NULL_LITERAL ((Literal){LITERAL_NULL, { .integer = 0 }}) +#define TO_BOOLEAN_LITERAL(value) ((Literal){LITERAL_BOOLEAN, { .boolean = value }}) +#define TO_INTEGER_LITERAL(value) ((Literal){LITERAL_INTEGER, { .integer = value }}) +#define TO_FLOAT_LITERAL(value) ((Literal){LITERAL_FLOAT, { .number = value }}) +#define TO_STRING_LITERAL(value) _toStringLiteral(value) +// #define TO_ARRAY_PTR +// #define TO_DICTIONARY_PTR +// #define TO_FUNCTION_PTR(value) ((Literal){LITERAL_FUNCTION, { .function = (Function*)value }}) + +void printLiteral(Literal literal); +void freeLiteral(Literal literal); + +#define IS_TRUTHY(x) _isTruthy(x) + +#define STRLEN(lit) ((lit).as.string.length) + +//BUGFIX: macros are not functions +bool _isTruthy(Literal x); +Literal _toStringLiteral(char* cstr); + +//utils +char* copyString(char* original, int length); \ No newline at end of file diff --git a/source/memory.c b/source/memory.c new file mode 100644 index 0000000..ac7c528 --- /dev/null +++ b/source/memory.c @@ -0,0 +1,22 @@ +#include "memory.h" + +#include +#include + +void* reallocate(void* pointer, size_t oldSize, size_t newSize) { + if (newSize == 0) { + free(pointer); + + return NULL; + } + + void* mem = realloc(pointer, newSize); + + if (mem == NULL) { + fprintf(stderr, "[Internal]Memory allocation error (requested %d for %d, replacing %d)\n", (int)newSize, (int)pointer, (int)oldSize); + exit(-1); + } + + return mem; +} + diff --git a/source/memory.h b/source/memory.h new file mode 100644 index 0000000..30e5dcc --- /dev/null +++ b/source/memory.h @@ -0,0 +1,12 @@ +#pragma once + +#include "common.h" + +#define ALLOCATE(type, count) ((type*)reallocate(NULL, 0, sizeof(type) * (count))) +#define FREE(type, pointer) reallocate(pointer, sizeof(type), 0) +#define GROW_CAPACITY(capacity) ((capacity) < 8 ? 8 : (capacity) * 2) +#define GROW_ARRAY(type, pointer, oldCount, count) (type*)reallocate(pointer, sizeof(type) * (oldCount), sizeof(type) * (count)) +#define FREE_ARRAY(type, pointer, oldCount) reallocate(pointer, sizeof(type) * (oldCount), 0) + +void* reallocate(void* pointer, size_t oldSize, size_t newSize); + diff --git a/source/node.c b/source/node.c new file mode 100644 index 0000000..37b9596 --- /dev/null +++ b/source/node.c @@ -0,0 +1,53 @@ +#include "node.h" + +#include "memory.h" + +#include + +void freeNode(Node* node) { + switch(node->type) { + case NODE_ATOMIC: + freeLiteral(node->atomic.literal); + break; + + case NODE_UNARY: + freeNode(node->unary.child); + break; + + case NODE_BINARY: + freeNode(node->binary.left); + freeNode(node->binary.right); + break; + } + + FREE(Node, node); +} + +void emitAtomicLiteral(Node** nodeHandle, Literal literal) { + //allocate a new node + *nodeHandle = ALLOCATE(Node, 1); + + (*nodeHandle)->type = NODE_ATOMIC; + (*nodeHandle)->atomic.literal = literal; +} + +void printNode(Node* node) { + switch(node->type) { + case NODE_ATOMIC: + printf("atomic:"); + printLiteral(node->atomic.literal); + break; + + case NODE_UNARY: + printf("unary:"); + printNode(node->unary.child); + break; + + case NODE_BINARY: + printf("binary left:"); + printNode(node->binary.left); + printf("binary right:"); + printNode(node->binary.right); + break; + } +} \ No newline at end of file diff --git a/source/node.h b/source/node.h new file mode 100644 index 0000000..3b3bc86 --- /dev/null +++ b/source/node.h @@ -0,0 +1,43 @@ +#pragma once + +#include "opcodes.h" +#include "literal.h" + +//nodes are the intermediaries between parsers and compilers +typedef union _node Node; + +typedef enum NodeType { + NODE_ATOMIC, //a simple value + NODE_UNARY, //one child + NODE_BINARY, //two children, left and right + // NODE_GROUPING, +} NodeType; + +typedef struct NodeAtomic { + NodeType type; + Literal literal; +} NodeAtomic; + +typedef struct NodeUnary { + NodeType type; + Node* child; +} NodeUnary; + +typedef struct NodeBinary { + NodeType type; + Node* left; + Node* right; +} NodeBinary; + +union _node { + NodeType type; + NodeAtomic atomic; + NodeUnary unary; + NodeBinary binary; +}; + +void freeNode(Node* node); +void emitAtomicLiteral(Node** nodeHandle, Literal literal); + +void printNode(Node* node); + diff --git a/source/opcodes.h b/source/opcodes.h new file mode 100644 index 0000000..4ef776c --- /dev/null +++ b/source/opcodes.h @@ -0,0 +1,14 @@ +#pragma once + +typedef enum Opcode { + OP_EOF, + + //basic operations + OP_PRINT, + + //data + OP_LITERAL, + + //TODO: add more +} Opcode; + diff --git a/source/parser.c b/source/parser.c new file mode 100644 index 0000000..b26ff3d --- /dev/null +++ b/source/parser.c @@ -0,0 +1,349 @@ +#include "parser.h" +#include "common.h" + +#include "memory.h" +#include "literal.h" + +#include + +//utility functions +static void error(Parser* parser, Token token, const char* message) { + //keep going while panicing + if (parser->panic) return; + + fprintf(stderr, "[Line %d] Error", token.line); + + //check type + if (token.type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } + + else { + fprintf(stderr, " at '%.*s'", token.length, token.lexeme); + } + + //finally + fprintf(stderr, ": %s\n", message); + parser->error = true; + parser->panic = true; +} + +static void advance(Parser* parser) { + parser->previous = parser->current; + parser->current = scanLexer(parser->lexer); + + if (parser->current.type == TOKEN_ERROR) { + error(parser, parser->current, "Lexer error"); + } +} + +static bool match(Parser* parser, TokenType tokenType) { + if (parser->current.type == tokenType) { + advance(parser); + return true; + } + return false; +} + +static void consume(Parser* parser, TokenType tokenType, const char* msg) { + if (parser->current.type != tokenType) { + error(parser, parser->current, msg); + return; + } + + advance(parser); +} + +static void synchronize(Parser* parser) { + while (parser->current.type != TOKEN_EOF) { + switch(parser->current.type) { + //these tokens can start a line + case TOKEN_ASSERT: + case TOKEN_BREAK: + case TOKEN_CONST: + case TOKEN_CONTINUE: + case TOKEN_DO: + case TOKEN_EXPORT: + case TOKEN_FOR: + case TOKEN_FOREACH: + case TOKEN_IF: + case TOKEN_IMPORT: + case TOKEN_PRINT: + case TOKEN_RETURN: + case TOKEN_VAR: + case TOKEN_WHILE: + parser->panic = false; + return; + + default: + advance(parser); + } + } +} + +//the pratt table collates the precedence rules +typedef enum { + PREC_NONE, + PREC_ASSIGNMENT, + PREC_TERNARY, + PREC_OR, + PREC_AND, + PREC_EQUALITY, + PREC_COMPARISON, + PREC_TERM, + PREC_FACTOR, + PREC_UNARY, + PREC_CALL, + PREC_PRIMARY, +} PrecedenceRule; + +typedef void (*ParseFn)(Parser* parser, Node** nodeHandle, bool canBeAssigned); + +typedef struct { + ParseFn prefix; + ParseFn infix; + PrecedenceRule precedence; +} ParseRule; + +ParseRule parseRules[]; + +//forward declarations +static void parsePrecedence(Parser* parser, Node** nodeHandle, PrecedenceRule rule); + +//the atomic expression rules +static void string(Parser* parser, Node** nodeHandle, bool canBeAssigned) { + //handle strings + switch(parser->previous.type) { + case TOKEN_LITERAL_STRING: + emitAtomicLiteral(nodeHandle, TO_STRING_LITERAL(copyString(parser->previous.lexeme, parser->previous.length))); + break; + + //TODO: interpolated strings + + default: + error(parser, parser->previous, "Unexpected token passed to string precedence rule"); + } +} + +static void binary(Parser* parser, Node** nodeHandle, bool canBeAssigned) { + //TODO +} + +static void unary(Parser* parser, Node** nodeHandle, bool canBeAssigned) { + //TODO +} + +static void atomic(Parser* parser, Node** nodeHandle, bool canBeAssigned) { + switch(parser->previous.type) { + case TOKEN_NULL: + emitAtomicLiteral(nodeHandle, TO_NULL_LITERAL); + break; + + case TOKEN_LITERAL_TRUE: + emitAtomicLiteral(nodeHandle, TO_BOOLEAN_LITERAL(true)); + break; + + case TOKEN_LITERAL_FALSE: + emitAtomicLiteral(nodeHandle, TO_BOOLEAN_LITERAL(false)); + break; + + default: + error(parser, parser->previous, "Unexpected token passed to atomic precedence rule"); + } +} + +ParseRule parseRules[] = { //must match the token types + //types + {atomic, NULL, PREC_NONE},// TOKEN_NULL, + {NULL, NULL, PREC_NONE},// TOKEN_BOOLEAN, + {NULL, NULL, PREC_NONE},// TOKEN_INTEGER, + {NULL, NULL, PREC_NONE},// TOKEN_FLOAT, + {NULL, NULL, PREC_NONE},// TOKEN_STRING, + {NULL, NULL, PREC_NONE},// TOKEN_ARRAY, + {NULL, NULL, PREC_NONE},// TOKEN_DICTIONARY, + {NULL, NULL, PREC_NONE},// TOKEN_FUNCTION, + {NULL, NULL, PREC_NONE},// TOKEN_ANY, + + //keywords and reserved words + {NULL, NULL, PREC_NONE},// TOKEN_AS, + {NULL, NULL, PREC_NONE},// TOKEN_ASSERT, + {NULL, NULL, PREC_NONE},// TOKEN_BREAK, + {NULL, NULL, PREC_NONE},// TOKEN_CLASS, + {NULL, NULL, PREC_NONE},// TOKEN_CONST, + {NULL, NULL, PREC_NONE},// TOKEN_CONTINUE, + {NULL, NULL, PREC_NONE},// TOKEN_DO, + {NULL, NULL, PREC_NONE},// TOKEN_ELSE, + {NULL, NULL, PREC_NONE},// TOKEN_EXPORT, + {NULL, NULL, PREC_NONE},// TOKEN_FOR, + {NULL, NULL, PREC_NONE},// TOKEN_FOREACH, + {NULL, NULL, PREC_NONE},// TOKEN_IF, + {NULL, NULL, PREC_NONE},// TOKEN_IMPORT, + {NULL, NULL, PREC_NONE},// TOKEN_IN, + {NULL, NULL, PREC_NONE},// TOKEN_OF, + {NULL, NULL, PREC_NONE},// TOKEN_PRINT, + {NULL, NULL, PREC_NONE},// TOKEN_RETURN, + {NULL, NULL, PREC_NONE},// TOKEN_USING, + {NULL, NULL, PREC_NONE},// TOKEN_VAR, + {NULL, NULL, PREC_NONE},// TOKEN_WHILE, + + //literal values + {NULL, NULL, PREC_NONE},// TOKEN_IDENTIFIER, + {atomic, NULL, PREC_NONE},// TOKEN_LITERAL_TRUE, + {atomic, NULL, PREC_NONE},// TOKEN_LITERAL_FALSE, + {NULL, NULL, PREC_NONE},// TOKEN_LITERAL_INTEGER, + {NULL, NULL, PREC_NONE},// TOKEN_LITERAL_FLOAT, + {string, NULL, PREC_PRIMARY},// TOKEN_LITERAL_STRING, + + //math operators + {NULL, NULL, PREC_NONE},// TOKEN_PLUS, + {NULL, NULL, PREC_NONE},// TOKEN_MINUS, + {NULL, NULL, PREC_NONE},// TOKEN_MULTIPLY, + {NULL, NULL, PREC_NONE},// TOKEN_DIVIDE, + {NULL, NULL, PREC_NONE},// TOKEN_MODULO, + {NULL, NULL, PREC_NONE},// TOKEN_PLUS_ASSIGN, + {NULL, NULL, PREC_NONE},// TOKEN_MINUS_ASSIGN, + {NULL, NULL, PREC_NONE},// TOKEN_MULTIPLY_ASSIGN, + {NULL, NULL, PREC_NONE},// TOKEN_DIVIDE_ASSIGN, + {NULL, NULL, PREC_NONE},// TOKEN_MODULO_ASSIGN, + {NULL, NULL, PREC_NONE},// TOKEN_PLUS_PLUS, + {NULL, NULL, PREC_NONE},// TOKEN_MINUS_MINUS, + + //logical operators + {NULL, NULL, PREC_NONE},// TOKEN_PAREN_LEFT, + {NULL, NULL, PREC_NONE},// TOKEN_PAREN_RIGHT, + {NULL, NULL, PREC_NONE},// TOKEN_BRACKET_LEFT, + {NULL, NULL, PREC_NONE},// TOKEN_BRACKET_RIGHT, + {NULL, NULL, PREC_NONE},// TOKEN_BRACE_LEFT, + {NULL, NULL, PREC_NONE},// TOKEN_BRACE_RIGHT, + {NULL, NULL, PREC_NONE},// TOKEN_NOT, + {NULL, NULL, PREC_NONE},// TOKEN_NOT_EQUAL, + {NULL, NULL, PREC_NONE},// TOKEN_EQUAL, + {NULL, NULL, PREC_NONE},// TOKEN_LESS, + {NULL, NULL, PREC_NONE},// TOKEN_GREATER, + {NULL, NULL, PREC_NONE},// TOKEN_LESS_EQUAL, + {NULL, NULL, PREC_NONE},// TOKEN_GREATER_EQUAL, + {NULL, NULL, PREC_NONE},// TOKEN_AND, + {NULL, NULL, PREC_NONE},// TOKEN_OR, + + //other operators + {NULL, NULL, PREC_NONE},// TOKEN_ASSIGN, + {NULL, NULL, PREC_NONE},// TOKEN_COLON, + {NULL, NULL, PREC_NONE},// TOKEN_SEMICOLON, + {NULL, NULL, PREC_NONE},// TOKEN_COMMA, + {NULL, NULL, PREC_NONE},// TOKEN_DOT, + {NULL, NULL, PREC_NONE},// TOKEN_PIPE, + {NULL, NULL, PREC_NONE},// TOKEN_REST, + + //meta tokens + {NULL, NULL, PREC_NONE},// TOKEN_PASS, + {NULL, NULL, PREC_NONE},// TOKEN_ERROR, + {NULL, NULL, PREC_NONE},// TOKEN_EOF, +}; + +ParseRule* getRule(TokenType type) { + return &parseRules[type]; +} + +static void parsePrecedence(Parser* parser, Node** nodeHandle, PrecedenceRule rule) { + //every expression has a prefix rule + advance(parser); + ParseFn prefixRule = getRule(parser->previous.type)->prefix; + + if (prefixRule == NULL) { + error(parser, parser->previous, "Expected expression"); + return; + } + + bool canBeAssigned = rule <= PREC_ASSIGNMENT; + prefixRule(parser, nodeHandle, canBeAssigned); + + //infix rules are left-recursive + while (rule <= getRule(parser->current.type)->precedence) { + ParseFn infixRule = getRule(parser->current.type)->infix; + + if (infixRule == NULL) { + error(parser, parser->current, "Expected operator"); + return; + } + + infixRule(parser, nodeHandle, canBeAssigned); //NOTE: infix rule must advance the parser + } + + //if your precedence is below "assignment" + if (canBeAssigned && match(parser, TOKEN_ASSIGN)) { + error(parser, parser->current, "Invalid assignment target"); + } +} + +//expressions +static void expression(Parser* parser, Node** nodeHandle) { + //delegate to the pratt table for expression precedence + parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT); +} + +//statements +static void printStmt(Parser* parser, Node* node) { + int line = parser->previous.line; + + //set the node info + node->type = NODE_UNARY; + node->unary.child = ALLOCATE(Node, 1); + expression(parser, &(node->unary.child)); + + consume(parser, TOKEN_SEMICOLON, "Expected ';' at end of print statement"); +} + +//precedence functions +static void expressionStmt(Parser* parser, Node* node) { + error(parser, parser->previous, "Expression statements not yet implemented"); +} + +static void statement(Parser* parser, Node* node) { + //print + if (match(parser, TOKEN_PRINT)) { + printStmt(parser, node); + return; + } + + //default + expressionStmt(parser, node); +} + +static void declaration(Parser* parser, Node* node) { + statement(parser, node); + + if (parser->panic) { + synchronize(parser); + } +} + +//exposed functions +void initParser(Parser* parser, Lexer* lexer) { + parser->lexer = lexer; + parser->error = false; + parser->panic = false; + + parser->previous.type = TOKEN_NULL; + parser->current.type = TOKEN_NULL; + advance(parser); +} + +void freeParser(Parser* parser) { + initParser(parser, NULL); +} + +Node* scanParser(Parser* parser) { + //check for EOF + if (match(parser, TOKEN_EOF)) { + return NULL; + } + + //returns nodes on the heap + Node* node = ALLOCATE(Node, 1); + + //process the grammar rule for this line + declaration(parser, node); + + return node; +} + diff --git a/source/parser.h b/source/parser.h new file mode 100644 index 0000000..6559d1d --- /dev/null +++ b/source/parser.h @@ -0,0 +1,21 @@ +#pragma once + +#include "parser.h" + +#include "lexer.h" +#include "node.h" + +//DOCS: parsers are bound to a lexer, and turn the outputted tokens into AST nodes +typedef struct { + Lexer* lexer; + bool error; //I've had an error + bool panic; //I am processing an error + + //track the last two outputs from the lexer + Token current; + Token previous; +} Parser; + +void initParser(Parser* parser, Lexer* lexer); +void freeParser(Parser* parser); +Node* scanParser(Parser* parser); diff --git a/source/repl_main.c b/source/repl_main.c index 8baf422..d946772 100644 --- a/source/repl_main.c +++ b/source/repl_main.c @@ -1,7 +1,7 @@ #include "debug.h" #include "lexer.h" -//-#include "parser.h" +#include "parser.h" //#include "toy.h" #include @@ -130,16 +130,22 @@ void repl() { void debug() { Lexer lexer; - Token token; + Parser parser; char* source = readFile(command.filename); initLexer(&lexer, source); + initParser(&parser, &lexer); - //run the lexer until the end of the source - do { - token = scanLexer(&lexer); - } while(token.type != TOKEN_EOF); + //run the parser until the end of the source + Node* node = scanParser(&parser); + while(node != NULL) { + printNode(node); + + freeNode(node); + + node = scanParser(&parser); + } } //entry point diff --git a/source/token_types.h b/source/token_types.h index f5d33c3..2f04a36 100644 --- a/source/token_types.h +++ b/source/token_types.h @@ -36,7 +36,6 @@ typedef enum TokenType { //literal values TOKEN_IDENTIFIER, - TOKEN_LITERAL_NULL, TOKEN_LITERAL_TRUE, TOKEN_LITERAL_FALSE, TOKEN_LITERAL_INTEGER,