Lexer partially working

This commit is contained in:
2022-08-03 14:06:54 +01:00
parent 3cad70dddd
commit 1ff32fe101
17 changed files with 687 additions and 34 deletions

4
scripts/example.toy Normal file
View File

@@ -0,0 +1,4 @@
print "hello world";
print null;
print true;
print false;

View File

@@ -1 +0,0 @@
print "Hello world";

View File

@@ -6,29 +6,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
void printToken(Token* token) {
if (token->type == TOKEN_ERROR) {
printf("Error\t%d\t%.*s\n", token->line, token->length, token->lexeme);
return;
}
printf("\t%d\t%d\t", token->type, token->line);
if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_LITERAL_INTEGER || token->type == TOKEN_LITERAL_FLOAT || token->type == TOKEN_LITERAL_STRING) {
printf("%.*s\t", token->length, token->lexeme);
} else {
char* keyword = findKeywordByType(token->type);
if (keyword != NULL) {
printf("%s", keyword);
} else {
printf("-");
}
}
printf("\n");
}
//declare the singleton //declare the singleton
Command command; Command command;

View File

@@ -1,9 +1,6 @@
#pragma once #pragma once
#include "common.h" #include "common.h"
#include "lexer.h"
void printToken(Token* token);
//for processing the command line arguments //for processing the command line arguments
typedef struct { typedef struct {

View File

@@ -294,4 +294,27 @@ Token scanLexer(Lexer* lexer) {
default: default:
return makeErrorToken(lexer, "Unexpected token"); return makeErrorToken(lexer, "Unexpected token");
} }
}
void printToken(Token* token) {
if (token->type == TOKEN_ERROR) {
printf("Error\t%d\t%.*s\n", token->line, token->length, token->lexeme);
return;
}
printf("\t%d\t%d\t", token->type, token->line);
if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_LITERAL_INTEGER || token->type == TOKEN_LITERAL_FLOAT || token->type == TOKEN_LITERAL_STRING) {
printf("%.*s\t", token->length, token->lexeme);
} else {
char* keyword = findKeywordByType(token->type);
if (keyword != NULL) {
printf("%s", keyword);
} else {
printf("-");
}
}
printf("\n");
} }

View File

@@ -22,3 +22,4 @@ typedef struct {
void initLexer(Lexer* lexer, char* source); void initLexer(Lexer* lexer, char* source);
Token scanLexer(Lexer* lexer); Token scanLexer(Lexer* lexer);
void printToken(Token* token);

59
source/literal.c Normal file
View File

@@ -0,0 +1,59 @@
#include "literal.h"
#include "memory.h"
#include "debug.h"
#include <stdio.h>
#include <string.h>
void printLiteral(Literal literal) {
switch(literal.type) {
case LITERAL_NULL:
printf("null\n");
break;
case LITERAL_BOOLEAN:
printf(AS_BOOLEAN(literal) ? "true\n" : "false\n");
break;
case LITERAL_INTEGER:
printf("%d\n", AS_INTEGER(literal));
break;
case LITERAL_FLOAT:
printf("%g\n", AS_FLOAT(literal));
break;
case LITERAL_STRING:
printf("%.*s (%d)\n", STRLEN(literal), AS_STRING(literal), STRLEN(literal));
break;
case LITERAL_FUNCTION:
printf("<toy function>\n");
break;
default:
//should never bee seen
fprintf(stderr, "[Internal] Unrecognized literal type: %d", literal.type);
}
}
void freeLiteral(Literal literal) {
if (IS_STRING(literal)) {
FREE(char, AS_STRING(literal));
return;
}
}
bool _isTruthy(Literal x) {
return (IS_NULL(x) || (IS_BOOLEAN(x) && AS_BOOLEAN(x)) || (IS_INTEGER(x) && AS_INTEGER(x) != 0) || (IS_FLOAT(x) && AS_FLOAT(x) != 0));
}
Literal _toStringLiteral(char* cstr) {
return ((Literal){LITERAL_STRING, { .string.ptr = (char*)cstr, .string.length = strlen((char*)cstr) }});
}
char* copyString(char* original, int length) {
char* buffer = ALLOCATE(char, length + 1);
strncpy(buffer, original, length);
return buffer;
}

74
source/literal.h Normal file
View File

@@ -0,0 +1,74 @@
#pragma once
#include "common.h"
#include <string.h>
typedef enum {
LITERAL_NULL,
LITERAL_BOOLEAN,
LITERAL_INTEGER,
LITERAL_FLOAT,
LITERAL_STRING,
LITERAL_ARRAY,
LITERAL_DICTIONARY,
LITERAL_FUNCTION,
} LiteralType;
typedef struct {
LiteralType type;
union {
bool boolean;
int integer;
float number;
struct {
char* ptr;
int length; //could possibly cut it down further by removing this
} string;
// //experimental
// void* array;
// void* dictionary;
// void* function;
} as;
} Literal;
#define IS_NULL(value) ((value).type == LITERAL_NULL)
#define IS_BOOLEAN(value) ((value).type == LITERAL_BOOLEAN)
#define IS_INTEGER(value) ((value).type == LITERAL_INTEGER)
#define IS_FLOAT(value) ((value).type == LITERAL_FLOAT)
#define IS_STRING(value) ((value).type == LITERAL_STRING)
#define IS_ARRAY(value) ((value).type == LITERAL_ARRAY)
#define IS_DICTIONARY(value) ((value).type == LITERAL_DICTIONARY)
#define IS_FUNCTION(value) ((value).type == LITERAL_FUNCTION)
#define AS_BOOLEAN(value) ((value).as.boolean)
#define AS_INTEGER(value) ((value).as.integer)
#define AS_FLOAT(value) ((value).as.number)
#define AS_STRING(value) ((value).as.string.ptr)
// #define AS_ARRAY_PTR(value)
// #define AS_DICTIONARY_PTR(value)
// #define AS_FUNCTION_PTR(value) ((Function*)((value).as.function))
#define TO_NULL_LITERAL ((Literal){LITERAL_NULL, { .integer = 0 }})
#define TO_BOOLEAN_LITERAL(value) ((Literal){LITERAL_BOOLEAN, { .boolean = value }})
#define TO_INTEGER_LITERAL(value) ((Literal){LITERAL_INTEGER, { .integer = value }})
#define TO_FLOAT_LITERAL(value) ((Literal){LITERAL_FLOAT, { .number = value }})
#define TO_STRING_LITERAL(value) _toStringLiteral(value)
// #define TO_ARRAY_PTR
// #define TO_DICTIONARY_PTR
// #define TO_FUNCTION_PTR(value) ((Literal){LITERAL_FUNCTION, { .function = (Function*)value }})
void printLiteral(Literal literal);
void freeLiteral(Literal literal);
#define IS_TRUTHY(x) _isTruthy(x)
#define STRLEN(lit) ((lit).as.string.length)
//BUGFIX: macros are not functions
bool _isTruthy(Literal x);
Literal _toStringLiteral(char* cstr);
//utils
char* copyString(char* original, int length);

22
source/memory.c Normal file
View File

@@ -0,0 +1,22 @@
#include "memory.h"
#include <stdio.h>
#include <stdlib.h>
void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
if (newSize == 0) {
free(pointer);
return NULL;
}
void* mem = realloc(pointer, newSize);
if (mem == NULL) {
fprintf(stderr, "[Internal]Memory allocation error (requested %d for %d, replacing %d)\n", (int)newSize, (int)pointer, (int)oldSize);
exit(-1);
}
return mem;
}

12
source/memory.h Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include "common.h"
#define ALLOCATE(type, count) ((type*)reallocate(NULL, 0, sizeof(type) * (count)))
#define FREE(type, pointer) reallocate(pointer, sizeof(type), 0)
#define GROW_CAPACITY(capacity) ((capacity) < 8 ? 8 : (capacity) * 2)
#define GROW_ARRAY(type, pointer, oldCount, count) (type*)reallocate(pointer, sizeof(type) * (oldCount), sizeof(type) * (count))
#define FREE_ARRAY(type, pointer, oldCount) reallocate(pointer, sizeof(type) * (oldCount), 0)
void* reallocate(void* pointer, size_t oldSize, size_t newSize);

53
source/node.c Normal file
View File

@@ -0,0 +1,53 @@
#include "node.h"
#include "memory.h"
#include <stdio.h>
void freeNode(Node* node) {
switch(node->type) {
case NODE_ATOMIC:
freeLiteral(node->atomic.literal);
break;
case NODE_UNARY:
freeNode(node->unary.child);
break;
case NODE_BINARY:
freeNode(node->binary.left);
freeNode(node->binary.right);
break;
}
FREE(Node, node);
}
void emitAtomicLiteral(Node** nodeHandle, Literal literal) {
//allocate a new node
*nodeHandle = ALLOCATE(Node, 1);
(*nodeHandle)->type = NODE_ATOMIC;
(*nodeHandle)->atomic.literal = literal;
}
void printNode(Node* node) {
switch(node->type) {
case NODE_ATOMIC:
printf("atomic:");
printLiteral(node->atomic.literal);
break;
case NODE_UNARY:
printf("unary:");
printNode(node->unary.child);
break;
case NODE_BINARY:
printf("binary left:");
printNode(node->binary.left);
printf("binary right:");
printNode(node->binary.right);
break;
}
}

43
source/node.h Normal file
View File

@@ -0,0 +1,43 @@
#pragma once
#include "opcodes.h"
#include "literal.h"
//nodes are the intermediaries between parsers and compilers
typedef union _node Node;
typedef enum NodeType {
NODE_ATOMIC, //a simple value
NODE_UNARY, //one child
NODE_BINARY, //two children, left and right
// NODE_GROUPING,
} NodeType;
typedef struct NodeAtomic {
NodeType type;
Literal literal;
} NodeAtomic;
typedef struct NodeUnary {
NodeType type;
Node* child;
} NodeUnary;
typedef struct NodeBinary {
NodeType type;
Node* left;
Node* right;
} NodeBinary;
union _node {
NodeType type;
NodeAtomic atomic;
NodeUnary unary;
NodeBinary binary;
};
void freeNode(Node* node);
void emitAtomicLiteral(Node** nodeHandle, Literal literal);
void printNode(Node* node);

14
source/opcodes.h Normal file
View File

@@ -0,0 +1,14 @@
#pragma once
typedef enum Opcode {
OP_EOF,
//basic operations
OP_PRINT,
//data
OP_LITERAL,
//TODO: add more
} Opcode;

349
source/parser.c Normal file
View File

@@ -0,0 +1,349 @@
#include "parser.h"
#include "common.h"
#include "memory.h"
#include "literal.h"
#include <stdio.h>
//utility functions
static void error(Parser* parser, Token token, const char* message) {
//keep going while panicing
if (parser->panic) return;
fprintf(stderr, "[Line %d] Error", token.line);
//check type
if (token.type == TOKEN_EOF) {
fprintf(stderr, " at end");
}
else {
fprintf(stderr, " at '%.*s'", token.length, token.lexeme);
}
//finally
fprintf(stderr, ": %s\n", message);
parser->error = true;
parser->panic = true;
}
static void advance(Parser* parser) {
parser->previous = parser->current;
parser->current = scanLexer(parser->lexer);
if (parser->current.type == TOKEN_ERROR) {
error(parser, parser->current, "Lexer error");
}
}
static bool match(Parser* parser, TokenType tokenType) {
if (parser->current.type == tokenType) {
advance(parser);
return true;
}
return false;
}
static void consume(Parser* parser, TokenType tokenType, const char* msg) {
if (parser->current.type != tokenType) {
error(parser, parser->current, msg);
return;
}
advance(parser);
}
static void synchronize(Parser* parser) {
while (parser->current.type != TOKEN_EOF) {
switch(parser->current.type) {
//these tokens can start a line
case TOKEN_ASSERT:
case TOKEN_BREAK:
case TOKEN_CONST:
case TOKEN_CONTINUE:
case TOKEN_DO:
case TOKEN_EXPORT:
case TOKEN_FOR:
case TOKEN_FOREACH:
case TOKEN_IF:
case TOKEN_IMPORT:
case TOKEN_PRINT:
case TOKEN_RETURN:
case TOKEN_VAR:
case TOKEN_WHILE:
parser->panic = false;
return;
default:
advance(parser);
}
}
}
//the pratt table collates the precedence rules
typedef enum {
PREC_NONE,
PREC_ASSIGNMENT,
PREC_TERNARY,
PREC_OR,
PREC_AND,
PREC_EQUALITY,
PREC_COMPARISON,
PREC_TERM,
PREC_FACTOR,
PREC_UNARY,
PREC_CALL,
PREC_PRIMARY,
} PrecedenceRule;
typedef void (*ParseFn)(Parser* parser, Node** nodeHandle, bool canBeAssigned);
typedef struct {
ParseFn prefix;
ParseFn infix;
PrecedenceRule precedence;
} ParseRule;
ParseRule parseRules[];
//forward declarations
static void parsePrecedence(Parser* parser, Node** nodeHandle, PrecedenceRule rule);
//the atomic expression rules
static void string(Parser* parser, Node** nodeHandle, bool canBeAssigned) {
//handle strings
switch(parser->previous.type) {
case TOKEN_LITERAL_STRING:
emitAtomicLiteral(nodeHandle, TO_STRING_LITERAL(copyString(parser->previous.lexeme, parser->previous.length)));
break;
//TODO: interpolated strings
default:
error(parser, parser->previous, "Unexpected token passed to string precedence rule");
}
}
static void binary(Parser* parser, Node** nodeHandle, bool canBeAssigned) {
//TODO
}
static void unary(Parser* parser, Node** nodeHandle, bool canBeAssigned) {
//TODO
}
static void atomic(Parser* parser, Node** nodeHandle, bool canBeAssigned) {
switch(parser->previous.type) {
case TOKEN_NULL:
emitAtomicLiteral(nodeHandle, TO_NULL_LITERAL);
break;
case TOKEN_LITERAL_TRUE:
emitAtomicLiteral(nodeHandle, TO_BOOLEAN_LITERAL(true));
break;
case TOKEN_LITERAL_FALSE:
emitAtomicLiteral(nodeHandle, TO_BOOLEAN_LITERAL(false));
break;
default:
error(parser, parser->previous, "Unexpected token passed to atomic precedence rule");
}
}
ParseRule parseRules[] = { //must match the token types
//types
{atomic, NULL, PREC_NONE},// TOKEN_NULL,
{NULL, NULL, PREC_NONE},// TOKEN_BOOLEAN,
{NULL, NULL, PREC_NONE},// TOKEN_INTEGER,
{NULL, NULL, PREC_NONE},// TOKEN_FLOAT,
{NULL, NULL, PREC_NONE},// TOKEN_STRING,
{NULL, NULL, PREC_NONE},// TOKEN_ARRAY,
{NULL, NULL, PREC_NONE},// TOKEN_DICTIONARY,
{NULL, NULL, PREC_NONE},// TOKEN_FUNCTION,
{NULL, NULL, PREC_NONE},// TOKEN_ANY,
//keywords and reserved words
{NULL, NULL, PREC_NONE},// TOKEN_AS,
{NULL, NULL, PREC_NONE},// TOKEN_ASSERT,
{NULL, NULL, PREC_NONE},// TOKEN_BREAK,
{NULL, NULL, PREC_NONE},// TOKEN_CLASS,
{NULL, NULL, PREC_NONE},// TOKEN_CONST,
{NULL, NULL, PREC_NONE},// TOKEN_CONTINUE,
{NULL, NULL, PREC_NONE},// TOKEN_DO,
{NULL, NULL, PREC_NONE},// TOKEN_ELSE,
{NULL, NULL, PREC_NONE},// TOKEN_EXPORT,
{NULL, NULL, PREC_NONE},// TOKEN_FOR,
{NULL, NULL, PREC_NONE},// TOKEN_FOREACH,
{NULL, NULL, PREC_NONE},// TOKEN_IF,
{NULL, NULL, PREC_NONE},// TOKEN_IMPORT,
{NULL, NULL, PREC_NONE},// TOKEN_IN,
{NULL, NULL, PREC_NONE},// TOKEN_OF,
{NULL, NULL, PREC_NONE},// TOKEN_PRINT,
{NULL, NULL, PREC_NONE},// TOKEN_RETURN,
{NULL, NULL, PREC_NONE},// TOKEN_USING,
{NULL, NULL, PREC_NONE},// TOKEN_VAR,
{NULL, NULL, PREC_NONE},// TOKEN_WHILE,
//literal values
{NULL, NULL, PREC_NONE},// TOKEN_IDENTIFIER,
{atomic, NULL, PREC_NONE},// TOKEN_LITERAL_TRUE,
{atomic, NULL, PREC_NONE},// TOKEN_LITERAL_FALSE,
{NULL, NULL, PREC_NONE},// TOKEN_LITERAL_INTEGER,
{NULL, NULL, PREC_NONE},// TOKEN_LITERAL_FLOAT,
{string, NULL, PREC_PRIMARY},// TOKEN_LITERAL_STRING,
//math operators
{NULL, NULL, PREC_NONE},// TOKEN_PLUS,
{NULL, NULL, PREC_NONE},// TOKEN_MINUS,
{NULL, NULL, PREC_NONE},// TOKEN_MULTIPLY,
{NULL, NULL, PREC_NONE},// TOKEN_DIVIDE,
{NULL, NULL, PREC_NONE},// TOKEN_MODULO,
{NULL, NULL, PREC_NONE},// TOKEN_PLUS_ASSIGN,
{NULL, NULL, PREC_NONE},// TOKEN_MINUS_ASSIGN,
{NULL, NULL, PREC_NONE},// TOKEN_MULTIPLY_ASSIGN,
{NULL, NULL, PREC_NONE},// TOKEN_DIVIDE_ASSIGN,
{NULL, NULL, PREC_NONE},// TOKEN_MODULO_ASSIGN,
{NULL, NULL, PREC_NONE},// TOKEN_PLUS_PLUS,
{NULL, NULL, PREC_NONE},// TOKEN_MINUS_MINUS,
//logical operators
{NULL, NULL, PREC_NONE},// TOKEN_PAREN_LEFT,
{NULL, NULL, PREC_NONE},// TOKEN_PAREN_RIGHT,
{NULL, NULL, PREC_NONE},// TOKEN_BRACKET_LEFT,
{NULL, NULL, PREC_NONE},// TOKEN_BRACKET_RIGHT,
{NULL, NULL, PREC_NONE},// TOKEN_BRACE_LEFT,
{NULL, NULL, PREC_NONE},// TOKEN_BRACE_RIGHT,
{NULL, NULL, PREC_NONE},// TOKEN_NOT,
{NULL, NULL, PREC_NONE},// TOKEN_NOT_EQUAL,
{NULL, NULL, PREC_NONE},// TOKEN_EQUAL,
{NULL, NULL, PREC_NONE},// TOKEN_LESS,
{NULL, NULL, PREC_NONE},// TOKEN_GREATER,
{NULL, NULL, PREC_NONE},// TOKEN_LESS_EQUAL,
{NULL, NULL, PREC_NONE},// TOKEN_GREATER_EQUAL,
{NULL, NULL, PREC_NONE},// TOKEN_AND,
{NULL, NULL, PREC_NONE},// TOKEN_OR,
//other operators
{NULL, NULL, PREC_NONE},// TOKEN_ASSIGN,
{NULL, NULL, PREC_NONE},// TOKEN_COLON,
{NULL, NULL, PREC_NONE},// TOKEN_SEMICOLON,
{NULL, NULL, PREC_NONE},// TOKEN_COMMA,
{NULL, NULL, PREC_NONE},// TOKEN_DOT,
{NULL, NULL, PREC_NONE},// TOKEN_PIPE,
{NULL, NULL, PREC_NONE},// TOKEN_REST,
//meta tokens
{NULL, NULL, PREC_NONE},// TOKEN_PASS,
{NULL, NULL, PREC_NONE},// TOKEN_ERROR,
{NULL, NULL, PREC_NONE},// TOKEN_EOF,
};
ParseRule* getRule(TokenType type) {
return &parseRules[type];
}
static void parsePrecedence(Parser* parser, Node** nodeHandle, PrecedenceRule rule) {
//every expression has a prefix rule
advance(parser);
ParseFn prefixRule = getRule(parser->previous.type)->prefix;
if (prefixRule == NULL) {
error(parser, parser->previous, "Expected expression");
return;
}
bool canBeAssigned = rule <= PREC_ASSIGNMENT;
prefixRule(parser, nodeHandle, canBeAssigned);
//infix rules are left-recursive
while (rule <= getRule(parser->current.type)->precedence) {
ParseFn infixRule = getRule(parser->current.type)->infix;
if (infixRule == NULL) {
error(parser, parser->current, "Expected operator");
return;
}
infixRule(parser, nodeHandle, canBeAssigned); //NOTE: infix rule must advance the parser
}
//if your precedence is below "assignment"
if (canBeAssigned && match(parser, TOKEN_ASSIGN)) {
error(parser, parser->current, "Invalid assignment target");
}
}
//expressions
static void expression(Parser* parser, Node** nodeHandle) {
//delegate to the pratt table for expression precedence
parsePrecedence(parser, nodeHandle, PREC_ASSIGNMENT);
}
//statements
static void printStmt(Parser* parser, Node* node) {
int line = parser->previous.line;
//set the node info
node->type = NODE_UNARY;
node->unary.child = ALLOCATE(Node, 1);
expression(parser, &(node->unary.child));
consume(parser, TOKEN_SEMICOLON, "Expected ';' at end of print statement");
}
//precedence functions
static void expressionStmt(Parser* parser, Node* node) {
error(parser, parser->previous, "Expression statements not yet implemented");
}
static void statement(Parser* parser, Node* node) {
//print
if (match(parser, TOKEN_PRINT)) {
printStmt(parser, node);
return;
}
//default
expressionStmt(parser, node);
}
static void declaration(Parser* parser, Node* node) {
statement(parser, node);
if (parser->panic) {
synchronize(parser);
}
}
//exposed functions
void initParser(Parser* parser, Lexer* lexer) {
parser->lexer = lexer;
parser->error = false;
parser->panic = false;
parser->previous.type = TOKEN_NULL;
parser->current.type = TOKEN_NULL;
advance(parser);
}
void freeParser(Parser* parser) {
initParser(parser, NULL);
}
Node* scanParser(Parser* parser) {
//check for EOF
if (match(parser, TOKEN_EOF)) {
return NULL;
}
//returns nodes on the heap
Node* node = ALLOCATE(Node, 1);
//process the grammar rule for this line
declaration(parser, node);
return node;
}

21
source/parser.h Normal file
View File

@@ -0,0 +1,21 @@
#pragma once
#include "parser.h"
#include "lexer.h"
#include "node.h"
//DOCS: parsers are bound to a lexer, and turn the outputted tokens into AST nodes
typedef struct {
Lexer* lexer;
bool error; //I've had an error
bool panic; //I am processing an error
//track the last two outputs from the lexer
Token current;
Token previous;
} Parser;
void initParser(Parser* parser, Lexer* lexer);
void freeParser(Parser* parser);
Node* scanParser(Parser* parser);

View File

@@ -1,7 +1,7 @@
#include "debug.h" #include "debug.h"
#include "lexer.h" #include "lexer.h"
//-#include "parser.h" #include "parser.h"
//#include "toy.h" //#include "toy.h"
#include <stdio.h> #include <stdio.h>
@@ -130,16 +130,22 @@ void repl() {
void debug() { void debug() {
Lexer lexer; Lexer lexer;
Token token; Parser parser;
char* source = readFile(command.filename); char* source = readFile(command.filename);
initLexer(&lexer, source); initLexer(&lexer, source);
initParser(&parser, &lexer);
//run the lexer until the end of the source //run the parser until the end of the source
do { Node* node = scanParser(&parser);
token = scanLexer(&lexer); while(node != NULL) {
} while(token.type != TOKEN_EOF); printNode(node);
freeNode(node);
node = scanParser(&parser);
}
} }
//entry point //entry point

View File

@@ -36,7 +36,6 @@ typedef enum TokenType {
//literal values //literal values
TOKEN_IDENTIFIER, TOKEN_IDENTIFIER,
TOKEN_LITERAL_NULL,
TOKEN_LITERAL_TRUE, TOKEN_LITERAL_TRUE,
TOKEN_LITERAL_FALSE, TOKEN_LITERAL_FALSE,
TOKEN_LITERAL_INTEGER, TOKEN_LITERAL_INTEGER,