Wrote a basic lexer

2022-08-03 09:35:20 +01:00
parent 3cbf7b13eb
commit 3cad70dddd
12 changed files with 884 additions and 0 deletions
@@ -0,0 +1,34 @@
+export OUTDIR = out
+
+all: $(OUTDIR)
+	$(MAKE) -C source
+
+$(OUTDIR):
+	mkdir $(OUTDIR)
+
+.PHONY: clean
+
+clean:
+ifeq ($(findstring CYGWIN, $(shell uname)),CYGWIN)
+	find . -type f -name '*.o' -exec rm -f -r -v {} \;
+	find . -type f -name '*.a' -exec rm -f -r -v {} \;
+	find . -type f -name '*.exe' -exec rm -f -r -v {} \;
+	find . -type f -name '*.dll' -exec rm -f -r -v {} \;
+	find . -type f -name '*.lib' -exec rm -f -r -v {} \;
+	find . -type f -name '*.so' -exec rm -f -r -v {} \;
+	find . -empty -type d -delete
+else ifeq ($(shell uname), Linux)
+	find . -type f -name '*.o' -exec rm -f -r -v {} \;
+	find . -type f -name '*.a' -exec rm -f -r -v {} \;
+	find . -type f -name '*.exe' -exec rm -f -r -v {} \;
+	find . -type f -name '*.dll' -exec rm -f -r -v {} \;
+	find . -type f -name '*.lib' -exec rm -f -r -v {} \;
+	find . -type f -name '*.so' -exec rm -f -r -v {} \;
+	find . -empty -type d -delete
+else ifeq ($(OS),Windows_NT)
+	$(RM) *.o *.a *.exe 
+else
+	@echo "Deletion failed - what platform is this?"
+endif
+
+rebuild: clean all
@@ -0,0 +1 @@
+print "Hello world";
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define TOY_VERSION_MAJOR 0
+#define TOY_VERSION_MINOR 6
+#define TOY_VERSION_PATCH 0
+#define TOY_VERSION_BUILD __DATE__
+
@@ -0,0 +1,103 @@
+#include "debug.h"
+
+#include "keyword_types.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void printToken(Token* token) {
+	if (token->type == TOKEN_ERROR) {
+		printf("Error\t%d\t%.*s\n", token->line, token->length, token->lexeme);
+		return;
+	}
+
+	printf("\t%d\t%d\t", token->type, token->line);
+
+	if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_LITERAL_INTEGER || token->type == TOKEN_LITERAL_FLOAT || token->type == TOKEN_LITERAL_STRING) {
+		printf("%.*s\t", token->length, token->lexeme);
+	} else {
+		char* keyword = findKeywordByType(token->type);
+
+		if (keyword != NULL) {
+			printf("%s", keyword);
+		} else {
+			printf("-");
+		}
+	}
+
+	printf("\n");
+}
+
+//declare the singleton
+Command command;
+
+void initCommand(int argc, const char* argv[]) {
+	//default values
+	command.error = false;
+	command.help = false;
+	command.version = false;
+	command.filename = NULL;
+	command.source = NULL;
+	command.verbose = false;
+
+	for (int i = 1; i < argc; i++) { //start at 1 to skip the program name
+		if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) {
+			command.help = true;
+			continue;
+		}
+
+		if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
+			command.version = true;
+			continue;
+		}
+
+		if ((!strcmp(argv[i], "-f") || !strcmp(argv[i], "--file")) && i + 1 < argc) {
+			command.filename = (char*)argv[i + 1];
+			i++;
+			continue;
+		}
+
+		if ((!strcmp(argv[i], "-i") || !strcmp(argv[i], "--input")) && i + 1 < argc) {
+			command.source = (char*)argv[i + 1];
+			i++;
+			continue;
+		}
+
+		if (!strcmp(argv[i], "-d") || !strcmp(argv[i], "--debug")) {
+			command.verbose = true;
+			continue;
+		}
+
+		command.error = true;
+	}
+
+	//no arguments
+	if (argc == 1) {
+		command.error = true;
+	}
+}
+
+void usageCommand(int argc, const char* argv[]) {
+	printf("Usage: %s [-h | -v | [-d][-f filename | -i source]]\n\n", argv[0]);
+}
+
+void helpCommand(int argc, const char* argv[]) {
+	usageCommand(argc, argv);
+
+	printf("-h | --help\t\tShow this help then exit.\n");
+	printf("-v | --version\t\tShow version and copyright information then exit.\n");
+	printf("-f | --file filename\tParse and execute the source file.\n");
+	printf("-i | --input source\tParse and execute this given string of source code.\n");
+	printf("-d | --debug\t\tBe verbose when operating.\n");
+}
+
+void copyrightCommand(int argc, const char* argv[]) {
+	printf("Toy Programming Language Interpreter Version %d.%d.%d (built on %s)\n\n", TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH, TOY_VERSION_BUILD);
+	printf("Copyright (c) 2020-2022 Kayne Ruse, KR Game Studios\n\n");
+	printf("This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.\n\n");
+	printf("Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:\n\n");
+	printf("1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n\n");
+	printf("2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n\n");
+	printf("3. This notice may not be removed or altered from any source distribution.\n\n");
+}
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "common.h"
+#include "lexer.h"
+
+void printToken(Token* token);
+
+//for processing the command line arguments
+typedef struct {
+	bool error;
+	bool help;
+	bool version;
+	char* filename;
+	char* source;
+	bool verbose;
+} Command;
+
+extern Command command;
+
+void initCommand(int argc, const char* argv[]);
+
+void usageCommand(int argc, const char* argv[]);
+void helpCommand(int argc, const char* argv[]);
+void copyrightCommand(int argc, const char* argv[]);
@@ -0,0 +1,62 @@
+#include "keyword_types.h"
+
+#include "common.h"
+
+KeywordType keywordTypes[] = {
+	//type keywords
+	{TOKEN_NULL,       "null"},
+	{TOKEN_BOOLEAN,    "bool"},
+	{TOKEN_INTEGER,    "int"},
+	{TOKEN_FLOAT,      "float"},
+	{TOKEN_STRING,     "string"},
+	{TOKEN_ARRAY,      "array"},
+	{TOKEN_DICTIONARY, "dictionary"},
+	{TOKEN_FUNCTION,   "function"},
+	{TOKEN_ANY,        "any"},
+
+	//other keywords
+	{TOKEN_AS,         "as"},
+	{TOKEN_ASSERT,     "assert"},
+	{TOKEN_BREAK,      "break"},
+	{TOKEN_CLASS,      "class"},
+	{TOKEN_CONST,      "const"},
+	{TOKEN_CONTINUE,   "continue"},
+	{TOKEN_DO,         "do"},
+	{TOKEN_ELSE,       "else"},
+	{TOKEN_EXPORT,     "export"},
+	{TOKEN_FOR,        "for"},
+	{TOKEN_FOREACH,    "foreach"},
+	{TOKEN_IF,         "if"},
+	{TOKEN_IMPORT,     "import"},
+	{TOKEN_IN,         "in"},
+	{TOKEN_OF,         "of"},
+	{TOKEN_PRINT,      "print"},
+	{TOKEN_RETURN,     "return"},
+	{TOKEN_USING,      "using"},
+	{TOKEN_VAR,        "var"},
+	{TOKEN_WHILE,      "while"},
+
+	//literal values
+	{TOKEN_LITERAL_TRUE,   "true"},
+	{TOKEN_LITERAL_FALSE,  "false"},
+
+	//meta tokens
+	{TOKEN_PASS,       "pass"},
+	{TOKEN_ERROR,      "error"},
+
+	{TOKEN_EOF, NULL},
+};
+
+char* findKeywordByType(TokenType type) {
+	if (type == TOKEN_EOF) {
+		return "EOF";
+	}
+
+	for(int i = 0; keywordTypes[i].keyword; i++) {
+		if (keywordTypes[i].type == type) {
+			return keywordTypes[i].keyword;
+		}
+	}
+
+	return NULL;
+}
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "token_types.h"
+
+typedef struct {
+	TokenType type;
+	char* keyword;
+} KeywordType;
+
+extern KeywordType keywordTypes[];
+
+//for debugging
+char* findKeywordByType(TokenType type);
@@ -0,0 +1,297 @@
+#include "lexer.h"
+#include "keyword_types.h"
+
+#include "debug.h"
+
+#include <stdio.h>
+#include <string.h>
+
+//static generic utility functions
+static void cleanLexer(Lexer* lexer) {
+	lexer->source = NULL;
+	lexer->start = 0;
+	lexer->current = 0;
+	lexer->line = 1;
+}
+
+static bool isAtEnd(Lexer* lexer) {
+	return lexer->source[lexer->current] == '\0';
+}
+
+static char peek(Lexer* lexer) {
+	return lexer->source[lexer->current];
+}
+
+static char peekNext(Lexer* lexer) {
+	if (isAtEnd(lexer)) return '\0';
+	return lexer->source[lexer->current + 1];
+}
+
+static char advance(Lexer* lexer) {
+	if (isAtEnd(lexer)) {
+		return '\0';
+	}
+
+	//new line
+	if (lexer->source[lexer->current] == '\n') {
+		lexer->line++;
+	}
+
+	lexer->current++;
+	return lexer->source[lexer->current - 1];
+}
+
+static void eatWhitespace(Lexer* lexer) {
+	const char c = peek(lexer);
+
+	switch(c) {
+		case ' ':
+		case '\r':
+		case '\n':
+		case '\t':
+			advance(lexer);
+			break;
+
+		//comments
+		case '/':
+			//eat the line
+			if (peekNext(lexer) == '/') {
+				while (advance(lexer) != '\n' && !isAtEnd(lexer));
+				break;
+			}
+
+			//eat the block
+			if (peekNext(lexer) == '*') {
+				advance(lexer);
+				advance(lexer);
+				while(!(peek(lexer) == '*' && peekNext(lexer) == '/')) advance(lexer);
+				advance(lexer);
+				advance(lexer);
+				break;
+			}
+
+		default:
+			return;
+	}
+
+	//tail recursion
+	eatWhitespace(lexer);
+}
+
+static bool isDigit(Lexer* lexer) {
+	return peek(lexer) >= '0' && peek(lexer) <= '9';
+}
+
+static bool isAlpha(Lexer* lexer) {
+	return
+		(peek(lexer) >= 'A' && peek(lexer) <= 'Z') ||
+		(peek(lexer) >= 'a' && peek(lexer) <= 'z') ||
+		peek(lexer) == '_'
+	;
+}
+
+static bool match(Lexer* lexer, char c) {
+	if (peek(lexer) == c) {
+		advance(lexer);
+		return true;
+	}
+
+	return false;
+}
+
+//token generators
+static Token makeErrorToken(Lexer* lexer, char* msg) {
+	Token token;
+
+	token.type = TOKEN_ERROR;
+	token.lexeme = msg;
+	token.length = strlen(msg);
+	token.line = lexer->line;
+
+	if (command.verbose) {
+		printf("err:");
+		printToken(&token);
+	}
+
+	return token;
+}
+
+static Token makeToken(Lexer* lexer, TokenType type) {
+	Token token;
+
+	token.type = type;
+	token.lexeme = &lexer->source[lexer->current - 1];
+	token.length = 1;
+	token.line = lexer->line;
+
+	if (command.verbose) {
+		printf("tok:");
+		printToken(&token);
+	}
+
+	return token;
+}
+
+static Token makeIntegerOrFloat(Lexer* lexer) {
+	TokenType type = TOKEN_LITERAL_INTEGER; //what am I making?
+
+	while(isDigit(lexer)) advance(lexer);
+
+	if (peek(lexer) == '.') {
+		type = TOKEN_LITERAL_FLOAT;
+		advance(lexer);
+		while(isDigit(lexer)) advance(lexer);
+	}
+
+	Token token;
+
+	token.type = type;
+	token.lexeme = &lexer->source[lexer->start];
+	token.length = lexer->current - lexer->start;
+	token.line = lexer->line;
+
+	if (command.verbose) {
+		if (type == TOKEN_LITERAL_INTEGER) {
+			printf("int:");
+		} else {
+			printf("flt:");
+		}
+		printToken(&token);
+	}
+
+	return token;
+}
+
+static Token makeString(Lexer* lexer, char terminator) {
+	while (!isAtEnd(lexer) && peek(lexer) != terminator) {
+		advance(lexer);
+	}
+
+	advance(lexer); //eat terminator
+
+	if (isAtEnd(lexer)) {
+		return makeErrorToken(lexer, "Unterminated string");
+	}
+
+	Token token;
+
+	token.type = TOKEN_LITERAL_STRING;
+	token.lexeme = &lexer->source[lexer->start + 1];
+	token.length = lexer->current - lexer->start - 2;
+	token.line = lexer->line;
+
+	if (command.verbose) {
+		printf("str:");
+		printToken(&token);
+	}
+
+	return token;
+}
+
+static Token makeKeywordOrIdentifier(Lexer* lexer) {
+	advance(lexer); //first letter can only be alpha
+
+	while(isDigit(lexer) || isAlpha(lexer)) {
+		advance(lexer);
+	}
+
+	//scan for a keyword
+	for (int i = 0; keywordTypes[i].keyword; i++) {
+		if (strlen(keywordTypes[i].keyword) == (long unsigned int)(lexer->current - lexer->start) && !strncmp(keywordTypes[i].keyword, &lexer->source[lexer->start], lexer->current - lexer->start)) {
+			Token token;
+
+			token.type = keywordTypes[i].type;
+			token.lexeme = &lexer->source[lexer->start];
+			token.length = lexer->current - lexer->start;
+			token.line = lexer->line;
+
+			if (command.verbose) {
+				printf("kwd:");
+				printToken(&token);
+			}
+
+			return token;
+		}
+	}
+
+	//return an identifier
+	Token token;
+
+	token.type = TOKEN_IDENTIFIER;
+	token.lexeme = &lexer->source[lexer->start];
+	token.length = lexer->current - lexer->start;
+	token.line = lexer->line;
+
+	if (command.verbose) {
+		printf("idf:");
+		printToken(&token);
+	}
+
+	return token;
+}
+
+//exposed functions
+void initLexer(Lexer* lexer, char* source) {
+	cleanLexer(lexer);
+
+	lexer->source = source;
+}
+
+Token scanLexer(Lexer* lexer) {
+	eatWhitespace(lexer);
+
+	lexer->start = lexer->current;
+
+	if (isAtEnd(lexer)) return makeToken(lexer, TOKEN_EOF);
+
+	if (isDigit(lexer)) return makeIntegerOrFloat(lexer);
+	if (isAlpha(lexer)) return makeKeywordOrIdentifier(lexer);
+
+	char c = advance(lexer);
+
+	switch(c) {
+		case '(': return makeToken(lexer, TOKEN_PAREN_LEFT);
+		case ')': return makeToken(lexer, TOKEN_PAREN_RIGHT);
+		case '{': return makeToken(lexer, TOKEN_BRACE_LEFT);
+		case '}': return makeToken(lexer, TOKEN_BRACE_RIGHT);
+		case '[': return makeToken(lexer, match(lexer, ']') ? TOKEN_ARRAY : TOKEN_BRACKET_LEFT);
+		case ']': return makeToken(lexer, TOKEN_BRACKET_RIGHT);
+
+		case '+': return makeToken(lexer, match(lexer, '=') ? TOKEN_PLUS_ASSIGN : match(lexer, '+') ? TOKEN_PLUS_PLUS: TOKEN_PLUS);
+		case '-': return makeToken(lexer, match(lexer, '=') ? TOKEN_MINUS_ASSIGN : match(lexer, '-') ? TOKEN_MINUS_MINUS: TOKEN_MINUS);
+		case '*': return makeToken(lexer, match(lexer, '=') ? TOKEN_MULTIPLY_ASSIGN : TOKEN_MULTIPLY);
+		case '/': return makeToken(lexer, match(lexer, '=') ? TOKEN_DIVIDE_ASSIGN : TOKEN_DIVIDE);
+		case '%': return makeToken(lexer, match(lexer, '=') ? TOKEN_MODULO_ASSIGN : TOKEN_MODULO);
+
+		case '!': return makeToken(lexer, match(lexer, '=') ? TOKEN_NOT_EQUAL : TOKEN_NOT);
+		case '=': return makeToken(lexer, match(lexer, '=') ? TOKEN_EQUAL : TOKEN_ASSIGN);
+
+		case '<': return makeToken(lexer, match(lexer, '=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
+		case '>': return makeToken(lexer, match(lexer, '=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
+
+		case '&': //TOKEN_AND not used
+			if (advance(lexer) != '&') {
+				return makeErrorToken(lexer, "Unexpected '&'");
+			} else {
+				return makeToken(lexer, TOKEN_AND);
+			}
+
+		case '|':  return makeToken(lexer, match(lexer, '|') ? TOKEN_OR : TOKEN_PIPE);
+
+		case ':': return makeToken(lexer, TOKEN_COLON);
+		case ';': return makeToken(lexer, TOKEN_SEMICOLON);
+		case ',': return makeToken(lexer, TOKEN_COMMA);
+		case '.':
+			if (peek(lexer) == '.' && peekNext(lexer) == ',') {
+				return makeToken(lexer, TOKEN_REST);
+			}
+			return makeToken(lexer, TOKEN_DOT);
+
+		case '"':
+			return makeString(lexer, c);
+			//TODO: possibly support interpolated strings
+
+		default:
+			return makeErrorToken(lexer, "Unexpected token");
+	}
+}
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "common.h"
+#include "token_types.h"
+
+//lexers are bound to a string of code, and return a single token every time scan is called
+typedef struct {
+	char* source;
+	int start; //start of the token
+	int current; //current position of the lexer
+	int line; //track this for error handling
+} Lexer;
+
+//tokens are intermediaries between lexers and parsers
+typedef struct {
+	TokenType type;
+	char* lexeme;
+	int length;
+	int line;
+} Token;
+
+void initLexer(Lexer* lexer, char* source);
+Token scanLexer(Lexer* lexer);
+
@@ -0,0 +1,27 @@
+CC=gcc
+
+IDIR =.
+CFLAGS=$(addprefix -I,$(IDIR)) -g -Wall -W -pedantic
+LIBS=
+
+ODIR=obj
+SRC = $(wildcard *.c)
+OBJ = $(addprefix $(ODIR)/,$(SRC:.c=.o))
+
+OUT = ../$(OUTDIR)/toy
+
+all: $(OBJ)
+	$(CC) -o $(OUT) $^ $(CFLAGS) $(LIBS)
+
+$(OBJ): | $(ODIR)
+
+$(ODIR):
+	mkdir $(ODIR)
+
+$(ODIR)/%.o: %.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+.PHONY: clean
+
+clean:
+	$(RM) $(ODIR)
@@ -0,0 +1,197 @@
+#include "debug.h"
+
+#include "lexer.h"
+//-#include "parser.h"
+//#include "toy.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+//read a file and return it as a char array
+char* readFile(char* path) {
+	FILE* file = fopen(path, "rb");
+
+	if (file == NULL) {
+		fprintf(stderr, "Could not open file \"%s\"\n", path);
+		exit(74);
+	}
+
+	fseek(file, 0L, SEEK_END);
+	size_t fileSize = ftell(file);
+	rewind(file);
+
+	char* buffer = (char*)malloc(fileSize + 1);
+
+	if (buffer == NULL) {
+		fprintf(stderr, "Not enough memory to read \"%s\"\n", path);
+		exit(74);
+	}
+
+	size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
+
+	if (bytesRead < fileSize) {
+		fprintf(stderr, "Could not read file \"%s\"\n", path);
+		exit(74);
+	}
+
+	fclose(file);
+
+	buffer[bytesRead] = '\0';
+
+	return buffer;
+}
+
+/*
+//run functions
+void runString(char* source) {
+	Lexer lexer;
+	Parser parser;
+	Toy toy;
+
+	initLexer(&lexer, source);
+	initParser(&parser, &lexer);
+	initToy(&toy);
+
+	Chunk* chunk = scanParser(&parser);
+
+	if (chunk->count > 1 && command.verbose) {
+		printChunk(chunk, "    ");
+	}
+
+	executeChunk(&toy, chunk);
+
+	freeChunk(chunk);
+
+	freeToy(&toy);
+	freeParser(&parser);
+}
+
+void runFile(char* fname) {
+	char* source = readFile(fname);
+
+	runString(source);
+
+	free((void*)source);
+}
+
+void repl() {
+	const int size = 2048;
+	char input[size];
+	memset(input, 0, size);
+
+	Parser parser;
+	Toy toy;
+
+	initToy(&toy);
+
+	for(;;) {
+		printf(">");
+		fgets(input, size, stdin);
+
+		//setup
+		Lexer lexer;
+
+		initLexer(&lexer, input);
+		initParser(&parser, &lexer);
+
+		//run
+		Chunk* chunk = scanParser(&parser);
+
+		if (chunk->count > 1 && command.verbose) {
+			printChunk(chunk, "    ");
+		}
+
+		//clean up the memory
+		if (parser.error) {
+			freeChunk(chunk);
+			freeParser(&parser);
+			continue;
+		}
+
+		executeChunk(&toy, chunk);
+
+		if (toy.panic) {
+			toy.panic = false;
+			freeChunk(chunk);
+			freeParser(&parser);
+			continue;
+		}
+
+		freeChunk(chunk);
+
+		//cleanup
+		freeParser(&parser);
+	}
+
+	freeToy(&toy);
+}
+*/
+
+void debug() {
+	Lexer lexer;
+	Token token;
+
+	char* source = readFile(command.filename);
+
+	initLexer(&lexer, source);
+
+	//run the lexer until the end of the source
+	do {
+		token = scanLexer(&lexer);
+	} while(token.type != TOKEN_EOF);
+}
+
+//entry point
+int main(int argc, const char* argv[]) {
+	initCommand(argc, argv);
+
+	//command specific actions
+	if (command.error) {
+		usageCommand(argc, argv);
+		return 0;
+	}
+
+	if (command.help) {
+		helpCommand(argc, argv);
+		return 0;
+	}
+
+	if (command.version) {
+		copyrightCommand(argc, argv);
+		return 0;
+	}
+
+	//print this until the interpreter meets the specification
+	if (command.verbose) {
+		printf("Warning! This interpreter is a work in progress, it does not yet meet the %d.%d.%d specification.\n", TOY_VERSION_MAJOR, TOY_VERSION_MINOR, TOY_VERSION_PATCH);
+	}
+
+	if (command.filename) {
+		debug();
+//		runFile(command.filename);
+		return 0;
+	}
+
+	if (command.source) {
+//		runString(command.source);
+
+		// Lexer lexer;
+		// initLexer(&lexer, command.source);
+
+		// //debugging
+		// while(true) {
+		// 	Token token = scanLexer(&lexer);
+
+		// 	if (token.type == TOKEN_EOF) {
+		// 		break;
+		// 	}
+		// }
+
+		return 0;
+	}
+
+//	repl();
+
+	return 0;
+}
@@ -0,0 +1,91 @@
+#pragma once
+
+typedef enum TokenType {
+	//types
+	TOKEN_NULL,
+	TOKEN_BOOLEAN,
+	TOKEN_INTEGER,
+	TOKEN_FLOAT,
+	TOKEN_STRING,
+	TOKEN_ARRAY,
+	TOKEN_DICTIONARY,
+	TOKEN_FUNCTION,
+	TOKEN_ANY,
+
+	//keywords and reserved words
+	TOKEN_AS,
+	TOKEN_ASSERT,
+	TOKEN_BREAK,
+	TOKEN_CLASS,
+	TOKEN_CONST,
+	TOKEN_CONTINUE,
+	TOKEN_DO,
+	TOKEN_ELSE,
+	TOKEN_EXPORT,
+	TOKEN_FOR,
+	TOKEN_FOREACH,
+	TOKEN_IF,
+	TOKEN_IMPORT,
+	TOKEN_IN,
+	TOKEN_OF,
+	TOKEN_PRINT,
+	TOKEN_RETURN,
+	TOKEN_USING,
+	TOKEN_VAR,
+	TOKEN_WHILE,
+
+	//literal values
+	TOKEN_IDENTIFIER,
+	TOKEN_LITERAL_NULL,
+	TOKEN_LITERAL_TRUE,
+	TOKEN_LITERAL_FALSE,
+	TOKEN_LITERAL_INTEGER,
+	TOKEN_LITERAL_FLOAT,
+	TOKEN_LITERAL_STRING,
+
+	//math operators
+	TOKEN_PLUS,
+	TOKEN_MINUS,
+	TOKEN_MULTIPLY,
+	TOKEN_DIVIDE,
+	TOKEN_MODULO,
+	TOKEN_PLUS_ASSIGN,
+	TOKEN_MINUS_ASSIGN,
+	TOKEN_MULTIPLY_ASSIGN,
+	TOKEN_DIVIDE_ASSIGN,
+	TOKEN_MODULO_ASSIGN,
+	TOKEN_PLUS_PLUS,
+	TOKEN_MINUS_MINUS,
+
+	//logical operators
+	TOKEN_PAREN_LEFT,
+	TOKEN_PAREN_RIGHT,
+	TOKEN_BRACKET_LEFT,
+	TOKEN_BRACKET_RIGHT,
+	TOKEN_BRACE_LEFT,
+	TOKEN_BRACE_RIGHT,
+	TOKEN_NOT,
+	TOKEN_NOT_EQUAL,
+	TOKEN_EQUAL,
+	TOKEN_LESS,
+	TOKEN_GREATER,
+	TOKEN_LESS_EQUAL,
+	TOKEN_GREATER_EQUAL,
+	TOKEN_AND,
+	TOKEN_OR,
+
+	//other operators
+	TOKEN_ASSIGN,
+	TOKEN_COLON,
+	TOKEN_SEMICOLON,
+	TOKEN_COMMA,
+	TOKEN_DOT,
+	TOKEN_PIPE,
+	TOKEN_REST,
+
+	//meta tokens
+	TOKEN_PASS,
+	TOKEN_ERROR,
+	TOKEN_EOF,
+} TokenType;
+