From 6a883bde963ac24916c0227a31cbfd9f658220eb Mon Sep 17 00:00:00 2001 From: Kayne Ruse Date: Wed, 10 Aug 2022 11:01:32 +0100 Subject: [PATCH] Parser is reading variable declarations, read more This is an incomplete process. It's supposed to be robust enough to support the types of arrays and dictionaries, but arrays and dictionaries aren't implemented in the literals yet, so that's my next task. I'll come back to variable declarations later. --- source/compiler.c | 10 +++- source/lexer.c | 5 +- source/literal.c | 6 +-- source/literal.h | 6 +-- source/node.c | 57 ++++++++++++++++++++++ source/node.h | 23 ++++++++- source/opcodes.h | 8 +++- source/parser.c | 118 ++++++++++++++++++++++++++++++++++++++++++++-- source/scope.h | 2 +- 9 files changed, 218 insertions(+), 17 deletions(-) diff --git a/source/compiler.c b/source/compiler.c index e102ab8..f4b5b10 100644 --- a/source/compiler.c +++ b/source/compiler.c @@ -90,7 +90,15 @@ void writeCompiler(Compiler* compiler, Node* node) { compiler->bytecode[compiler->count++] = (unsigned char)OP_SCOPE_END; //1 byte break; - //TODO: conditional + case NODE_VAR_TYPES: + //TODO: OP_TYPE_DECL + break; + + case NODE_VAR_DECL: + //TODO: OP_VAR_DECL + OP_VAR_ASSIGN + break; + + //TODO: more } } diff --git a/source/lexer.c b/source/lexer.c index 5dd6e62..1c96181 100644 --- a/source/lexer.c +++ b/source/lexer.c @@ -301,7 +301,6 @@ Token scanLexer(Lexer* lexer) { } static void trim(char** s, int* l) { //all this to remove a newline? - *l = strlen(*s); while( isspace(( (*((unsigned char**)(s)))[(*l) - 1] )) ) (*l)--; while(**s && isspace( **(unsigned char**)(s)) ) { (*s)++; (*l)--; } } @@ -323,9 +322,9 @@ void printToken(Token* token) { printf("%s", keyword); } else { char* str = token->lexeme; - int length = 0; + int length = token->length; trim(&str, &length); - printf("%.*s", length, token->lexeme); + printf("%.*s", length, str); } } diff --git a/source/literal.c b/source/literal.c index 17c5a3c..2d77d3b 100644 --- a/source/literal.c +++ b/source/literal.c @@ -71,8 +71,8 @@ Literal _toStringLiteral(char* str) { return ((Literal){LITERAL_STRING, {.string.ptr = (char*)str, .string.length = strlen((char*)str)}}); } -Literal _toIdentifierLiteral(char* str, unsigned char types) { - return ((Literal){LITERAL_IDENTIFIER,{.identifier.ptr = (char*)str,.identifier.length = strlen((char*)str),.identifier.types = types}}); +Literal _toIdentifierLiteral(char* str) { + return ((Literal){LITERAL_IDENTIFIER,{.identifier.ptr = (char*)str,.identifier.length = strlen((char*)str)}}); } char* copyString(char* original, int length) { @@ -121,7 +121,7 @@ bool literalsAreEqual(Literal lhs, Literal rhs) { default: //should never bee seen - fprintf(stderr, "[Internal] Unrecognized literal type: %d\n", lhs.type); + fprintf(stderr, "[internal] Unrecognized literal type: %d\n", lhs.type); return false; } } diff --git a/source/literal.h b/source/literal.h index e5b13ee..fa55909 100644 --- a/source/literal.h +++ b/source/literal.h @@ -35,7 +35,6 @@ typedef struct { struct { //for variable names char* ptr; int length; - unsigned char types; } identifier; } as; } Literal; @@ -67,7 +66,7 @@ typedef struct { // #define TO_ARRAY_LITERAL // #define TO_DICTIONARY_LITERAL // #define TO_FUNCTION_LITERAL -#define TO_IDENTIFIER_LITERAL(value, types) _toIdentifierLiteral(value, types) +#define TO_IDENTIFIER_LITERAL(value) _toIdentifierLiteral(value) #define MASK(x) (1 >> (x)) #define TYPE_CONST 0 @@ -86,6 +85,7 @@ typedef struct { #define MASK_ARRAY (MASK(TYPE_ARRAY)) #define MASK_DICTIONARY (MASK(TYPE_DICTIONARY)) #define MASK_FUNCTION (MASK(TYPE_FUNCTION)) +#define MASK_ANY (MASK_BOOLEAN|MASK_INTEGER|MASK_FLOAT|MASK_STRING|MASK_ARRAY|MASK_DICTIONARY|MASK_FUNCTION) //utils void printLiteral(Literal literal); @@ -101,7 +101,7 @@ void freeLiteral(Literal literal); //BUGFIX: macros are not functions bool _isTruthy(Literal x); Literal _toStringLiteral(char* str); -Literal _toIdentifierLiteral(char* str, unsigned char types); +Literal _toIdentifierLiteral(char* str); //utils char* copyString(char* original, int length); diff --git a/source/node.c b/source/node.c index 0b14703..5a52c46 100644 --- a/source/node.c +++ b/source/node.c @@ -37,6 +37,20 @@ void freeNode(Node* node) { for (int i = 0; i < node->block.count; i++) { freeNode(node->block.nodes + i); } + //each sub-node gets freed individually + break; + + case NODE_VAR_TYPES: + for (int i = 0; i < node->varTypes.count; i++) { + freeNode(node->varTypes.nodes + 1); + } + //each sub-node gets freed individually + break; + + case NODE_VAR_DECL: + freeLiteral(node->varDecl.identifier); + freeNode(node->varDecl.varType); + freeNode(node->varDecl.expression); break; } @@ -91,6 +105,29 @@ void emitNodeBlock(Node** nodeHandle) { *nodeHandle = tmp; } +void emitNodeVarTypes(Node** nodeHandle, unsigned char mask) { + Node* tmp = ALLOCATE(Node, 1); + + tmp->type = NODE_VAR_TYPES; + tmp->varTypes.mask = mask; + tmp->varTypes.nodes = NULL; + tmp->varTypes.capacity = 0; + tmp->varTypes.count = 0; + + *nodeHandle = tmp; +} + +void emitNodeVarDecl(Node** nodeHandle, Literal identifier, Node* varType, Node* expression) { + Node* tmp = ALLOCATE(Node, 1); + + tmp->type = NODE_VAR_DECL; + tmp->varDecl.identifier = identifier; + tmp->varDecl.varType = varType; + tmp->varDecl.expression = expression; + + *nodeHandle = tmp; +} + void printNode(Node* node) { if (node == NULL) { return; @@ -134,5 +171,25 @@ void printNode(Node* node) { printf("}\n"); break; + + case NODE_VAR_TYPES: + printf("[\n"); + + for (int i = 0; i < node->varTypes.count; i++) { + printNode(&(node->varTypes.nodes[i])); + } + + printf("]\n"); + break; + + case NODE_VAR_DECL: + printf("vardecl("); + printLiteral(node->varDecl.identifier); + printf("; "); + printNode(node->varDecl.varType); + printf("; "); + printNode(node->varDecl.expression); + printf(")"); + break; } } \ No newline at end of file diff --git a/source/node.h b/source/node.h index fb3b7a7..dd8fa65 100644 --- a/source/node.h +++ b/source/node.h @@ -12,7 +12,9 @@ typedef enum NodeType { NODE_UNARY, //one child NODE_BINARY, //two children, left and right NODE_GROUPING, //one child - NODE_BLOCK, //contains bytecode + NODE_BLOCK, //contains sub-node array + NODE_VAR_TYPES, //contains a type mask and a sub-node array for compound types + NODE_VAR_DECL, //contains identifier literal, typenode, expression definition // NODE_CONDITIONAL, //three children: conditional, then path, else path } NodeType; @@ -46,6 +48,21 @@ typedef struct NodeBlock { int count; } NodeBlock; +typedef struct NodeVarTypes { + NodeType type; + unsigned char mask; + Node* nodes; + int capacity; + int count; +} NodeVarTypes; + +typedef struct NodeVarDecl { + NodeType type; + Literal identifier; + Node* varType; + Node* expression; +} NodeVarDecl; + union _node { NodeType type; NodeLiteral atomic; @@ -53,6 +70,8 @@ union _node { NodeBinary binary; NodeGrouping grouping; NodeBlock block; + NodeVarTypes varTypes; + NodeVarDecl varDecl; }; void freeNode(Node* node); @@ -61,6 +80,8 @@ void emitNodeUnary(Node** nodeHandle, Opcode opcode); void emitNodeBinary(Node** nodeHandle, Node* rhs, Opcode opcode); void emitNodeGrouping(Node** nodeHandle); void emitNodeBlock(Node** nodeHandle); +void emitNodeVarTypes(Node** nodeHandle, unsigned char mask); +void emitNodeVarDecl(Node** nodeHandle, Literal identifier, Node* varType, Node* expression); void printNode(Node* node); diff --git a/source/opcodes.h b/source/opcodes.h index 599fb0b..82d375a 100644 --- a/source/opcodes.h +++ b/source/opcodes.h @@ -11,7 +11,7 @@ typedef enum Opcode { OP_LITERAL, OP_LITERAL_LONG, //for more than 256 literals in a chunk - //operators + //arithmetic operators OP_NEGATE, OP_ADDITION, OP_SUBTRACTION, @@ -20,9 +20,15 @@ typedef enum Opcode { OP_MODULO, OP_GROUPING_BEGIN, OP_GROUPING_END, + + //variable stuff OP_SCOPE_BEGIN, OP_SCOPE_END, + OP_TYPE_DECL, //declare a compound type to be used + OP_VAR_DECL, //stack: literal name, literal type (referenced by array index) + OP_VAR_ASSIGN, //stack: literal name, literal value + //meta OP_SECTION_END, //TODO: add more diff --git a/source/parser.c b/source/parser.c index 935e185..63debbe 100644 --- a/source/parser.c +++ b/source/parser.c @@ -363,6 +363,7 @@ ParseRule* getRule(TokenType type) { return &parseRules[type]; } +//static analasys static bool calcStaticBinaryArithmetic(Node** nodeHandle) { switch((*nodeHandle)->binary.opcode) { case OP_ADDITION: @@ -479,7 +480,7 @@ static bool calcStaticBinaryArithmetic(Node** nodeHandle) { } static void parsePrecedence(Parser* parser, Node** nodeHandle, PrecedenceRule rule) { - //every expression has a prefix rule + //every valid expression has a prefix rule advance(parser); ParseFn prefixRule = getRule(parser->previous.type)->prefix; @@ -604,10 +605,119 @@ static void statement(Parser* parser, Node* node) { expressionStmt(parser, node); } -static void declaration(Parser* parser, Node** nodeHandle) { - //TODO: variable declarations +//declarations and definitions +static void readVarType(Parser* parser, Node** nodeHandle) { + //TODO: compound types with the "type" keyword + advance(parser); - statement(parser, *nodeHandle); + unsigned char typeMask = 0; + + Node* left = NULL; + Node* right = NULL; + + switch(parser->previous.type) { + case TOKEN_BOOLEAN: + typeMask |= MASK_BOOLEAN; + break; + + case TOKEN_INTEGER: + typeMask |= MASK_INTEGER; + break; + + case TOKEN_FLOAT: + typeMask |= MASK_FLOAT; + break; + + case TOKEN_STRING: + typeMask |= MASK_STRING; + break; + + //array, dictionary - read the sub-types + case TOKEN_BRACKET_LEFT: + //at least 1 type required + readVarType(parser, &left); + + if (match(parser, TOKEN_COMMA)) { + //if there's 2 types, it's a dictionary + readVarType(parser, &right); + typeMask |= MASK_DICTIONARY; + } + else { + //else it's just an array + typeMask |= MASK_ARRAY; + } + consume(parser, TOKEN_BRACKET_RIGHT, "Expected ']' at end of type definition"); + break; + + case TOKEN_ANY: + typeMask |= MASK_ANY; + break; + + //TODO: function + + default: + error(parser, parser->previous, "Bad type"); + return; + } + + //const follows the type + if (match(parser, TOKEN_CONST)) { + typeMask |= MASK_CONST; + } + + //generate the node + emitNodeVarTypes(nodeHandle, typeMask); + + //check for sub-nodes + if (left) { + int oldCapacity = (*nodeHandle)->varTypes.capacity; + + (*nodeHandle)->varTypes.capacity = GROW_CAPACITY(oldCapacity); + (*nodeHandle)->varTypes.nodes = GROW_ARRAY(Node, (*nodeHandle)->varTypes.nodes, oldCapacity, (*nodeHandle)->varTypes.capacity); + + //push left to the array + *((*nodeHandle)->varTypes.nodes) = *left; + + //append the other one too + if (right) { + *((*nodeHandle)->varTypes.nodes + 1) = *right; + } + } +} + +static void varDecl(Parser* parser, Node** nodeHandle) { + //read the identifier + consume(parser, TOKEN_IDENTIFIER, "Expected identifier after var keyword"); + Token identifierToken = parser->previous; + + //read the type, if present + Node* typeNode = NULL; + if (match(parser, TOKEN_COLON)) { + readVarType(parser, &typeNode); + } + + //variable definition is an expression + Node* expressionNode = NULL; + if (match(parser, TOKEN_ASSIGN)) { + expression(parser, &expressionNode); + } + + //TODO: compile-time static type check + + //finally + emitNodeVarDecl(nodeHandle, TO_IDENTIFIER_LITERAL(identifierToken.lexeme), typeNode, expressionNode); + + consume(parser, TOKEN_SEMICOLON, "Expected ';' at end of var declaration"); +} + +static void declaration(Parser* parser, Node** nodeHandle) { //assume nodeHandle holds a blank node + //variable declarations + if (match(parser, TOKEN_VAR)) { + varDecl(parser, nodeHandle); + } + else { + statement(parser, *nodeHandle); + } } //exposed functions diff --git a/source/scope.h b/source/scope.h index 3e21fb7..e449a35 100644 --- a/source/scope.h +++ b/source/scope.h @@ -4,7 +4,7 @@ #include "literal_dictionary.h" typedef struct Scope { - LiteralDictionary variables; + LiteralDictionary variables; //only allow identifiers as the keys struct Scope* ancestor; int references; //how many scopes point here } Scope;