diff --git a/README.md b/README.md index ed7d899..2507af4 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,10 @@ This repository holds the reference implementation for Toy, written in C. * Re-direct output, error and assertion failure messages * Open source under the zlib license +## Syntax + +*Omitted for review.* + ## Building *Omitted for review.* @@ -26,10 +30,6 @@ This repository holds the reference implementation for Toy, written in C. *Omitted for review.* -## Syntax - -*Omitted for review.* - # License This source code is covered by the zlib license (see [LICENSE](LICENSE)). diff --git a/lib/.gitkeep b/lib/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/repl/.gitkeep b/repl/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/source/toy_keywords.c b/source/toy_keywords.c new file mode 100644 index 0000000..81b49ac --- /dev/null +++ b/source/toy_keywords.c @@ -0,0 +1,75 @@ +#include "toy_keywords.h" + +#include + +const Toy_KeywordTypeTuple Toy_private_keywords[] = { + //null + {TOY_TOKEN_NULL, "null"}, + + //types + {TOY_TOKEN_TYPE_TYPE, "type"}, + {TOY_TOKEN_TYPE_BOOLEAN, "bool"}, + {TOY_TOKEN_TYPE_INTEGER, "int"}, + {TOY_TOKEN_TYPE_FLOAT, "float"}, + {TOY_TOKEN_TYPE_STRING, "string"}, + // TOY_TOKEN_TYPE_ARRAY, + // TOY_TOKEN_TYPE_DICTIONARY, + {TOY_TOKEN_TYPE_FUNCTION, "fn"}, + {TOY_TOKEN_TYPE_OPAQUE, "opaque"}, + {TOY_TOKEN_TYPE_ANY, "any"}, + + //keywords and reserved words + {TOY_TOKEN_KEYWORD_AS, "as"}, + {TOY_TOKEN_KEYWORD_ASSERT, "assert"}, + {TOY_TOKEN_KEYWORD_BREAK, "break"}, + {TOY_TOKEN_KEYWORD_CLASS, "class"}, + {TOY_TOKEN_KEYWORD_CONST, "const"}, //TODO: investigate the constness of types + {TOY_TOKEN_KEYWORD_CONTINUE, "continue"}, + {TOY_TOKEN_KEYWORD_DO, "do"}, + {TOY_TOKEN_KEYWORD_ELSE, "else"}, + {TOY_TOKEN_KEYWORD_EXPORT, "export"}, + {TOY_TOKEN_KEYWORD_FOR, "for"}, + {TOY_TOKEN_KEYWORD_FOREACH, "foreach"}, + {TOY_TOKEN_KEYWORD_IF, "if"}, + {TOY_TOKEN_KEYWORD_IMPORT, "import"}, + {TOY_TOKEN_KEYWORD_IN, "in"}, + {TOY_TOKEN_KEYWORD_OF, "of"}, + {TOY_TOKEN_KEYWORD_PRINT, "print"}, + {TOY_TOKEN_KEYWORD_RETURN, "return"}, + {TOY_TOKEN_KEYWORD_TYPEAS, "typeas"}, + {TOY_TOKEN_KEYWORD_TYPEOF, "typeof"}, + {TOY_TOKEN_KEYWORD_VAR, "var"}, + {TOY_TOKEN_KEYWORD_WHILE, "while"}, + + //literal values + {TOY_TOKEN_LITERAL_TRUE, "true"}, + {TOY_TOKEN_LITERAL_FALSE, "false"}, + + {TOY_TOKEN_EOF, NULL}, +}; + +const char* Toy_private_findKeywordByType(const Toy_TokenType type) { + if (type == TOY_TOKEN_EOF) { + return "EOF"; + } + + for(int i = 0; Toy_private_keywords[i].keyword; i++) { + if (Toy_private_keywords[i].type == type) { + return Toy_private_keywords[i].keyword; + } + } + + return NULL; +} + +Toy_TokenType Toy_private_findTypeByKeyword(const char* keyword) { + const int length = strlen(keyword); + + for (int i = 0; Toy_private_keywords[i].keyword; i++) { + if (!strncmp(keyword, Toy_private_keywords[i].keyword, length)) { + return Toy_private_keywords[i].type; + } + } + + return TOY_TOKEN_EOF; +} diff --git a/source/toy_keywords.h b/source/toy_keywords.h new file mode 100644 index 0000000..e8fba5e --- /dev/null +++ b/source/toy_keywords.h @@ -0,0 +1,15 @@ +#pragma once + +#include "toy_token_types.h" +#include "toy_common.h" + +typedef struct { + const Toy_TokenType type; + const char* keyword; +} Toy_KeywordTypeTuple; + +extern const Toy_KeywordTypeTuple Toy_private_keywords[]; + +//access +const char* Toy_private_findKeywordByType(const Toy_TokenType type); +Toy_TokenType Toy_private_findTypeByKeyword(const char* keyword); diff --git a/source/toy_lexer.h b/source/toy_lexer.h new file mode 100644 index 0000000..8de1dfa --- /dev/null +++ b/source/toy_lexer.h @@ -0,0 +1,25 @@ +#pragma once + +#include "toy_token_types.h" +#include "toy_common.h" + +//lexers are bound to a string of code +typedef struct { + int start; //start of the current token + int current; //current position of the lexer + int line; //track this for error handling + const char* source; +} Toy_Lexer; + +//tokens are intermediaries between lexers and parsers +typedef struct { + Toy_TokenType type; + int length; + int line; + const char* lexeme; +} Toy_Token; + +TOY_API void Toy_bindLexer(Toy_Lexer* lexer, const char* source); +TOY_API Toy_Token Toy_private_scanLexer(Toy_Lexer* lexer); +TOY_API void Toy_private_printToken(Toy_Token* token); + diff --git a/source/toy_token_types.h b/source/toy_token_types.h new file mode 100644 index 0000000..299a14d --- /dev/null +++ b/source/toy_token_types.h @@ -0,0 +1,104 @@ +#pragma once + +//the types of tokens produced by the lexer +typedef enum Toy_TokenType { + //with apologies to Tony Hoare + TOY_TOKEN_NULL, + + //variable names + TOY_TOKEN_IDENTIFIER, + + //types + TOY_TOKEN_TYPE_TYPE, + TOY_TOKEN_TYPE_BOOLEAN, + TOY_TOKEN_TYPE_INTEGER, + TOY_TOKEN_TYPE_FLOAT, + TOY_TOKEN_TYPE_STRING, + TOY_TOKEN_TYPE_ARRAY, + TOY_TOKEN_TYPE_DICTIONARY, + TOY_TOKEN_TYPE_FUNCTION, + TOY_TOKEN_TYPE_OPAQUE, + TOY_TOKEN_TYPE_ANY, + + //keywords and reserved words + TOY_TOKEN_KEYWORD_AS, + TOY_TOKEN_KEYWORD_ASSERT, + TOY_TOKEN_KEYWORD_BREAK, + TOY_TOKEN_KEYWORD_CLASS, + TOY_TOKEN_KEYWORD_CONST, + TOY_TOKEN_KEYWORD_CONTINUE, + TOY_TOKEN_KEYWORD_DO, + TOY_TOKEN_KEYWORD_ELSE, + TOY_TOKEN_KEYWORD_EXPORT, + TOY_TOKEN_KEYWORD_FOR, + TOY_TOKEN_KEYWORD_FOREACH, + TOY_TOKEN_KEYWORD_IF, + TOY_TOKEN_KEYWORD_IMPORT, + TOY_TOKEN_KEYWORD_IN, + TOY_TOKEN_KEYWORD_OF, + TOY_TOKEN_KEYWORD_PRINT, + TOY_TOKEN_KEYWORD_RETURN, + TOY_TOKEN_KEYWORD_TYPEAS, + TOY_TOKEN_KEYWORD_TYPEOF, + TOY_TOKEN_KEYWORD_VAR, + TOY_TOKEN_KEYWORD_WHILE, + + //literal values + TOY_TOKEN_LITERAL_TRUE, + TOY_TOKEN_LITERAL_FALSE, + TOY_TOKEN_LITERAL_INTEGER, + TOY_TOKEN_LITERAL_FLOAT, + TOY_TOKEN_LITERAL_STRING, + + //math operators + TOY_TOKEN_OPERATOR_PLUS, + TOY_TOKEN_OPERATOR_MINUS, + TOY_TOKEN_OPERATOR_MULTIPLY, + TOY_TOKEN_OPERATOR_DIVIDE, + TOY_TOKEN_OPERATOR_MODULO, + TOY_TOKEN_OPERATOR_PLUS_ASSIGN, + TOY_TOKEN_OPERATOR_MINUS_ASSIGN, + TOY_TOKEN_OPERATOR_MULTIPLY_ASSIGN, + TOY_TOKEN_OPERATOR_DIVIDE_ASSIGN, + TOY_TOKEN_OPERATOR_MODULO_ASSIGN, + TOY_TOKEN_OPERATOR_INCREMENT, + TOY_TOKEN_OPERATOR_DECREMENT, + TOY_TOKEN_OPERATOR_ASSIGN, + + //logical operators + TOY_TOKEN_OPERATOR_COMPARE, + TOY_TOKEN_OPERATOR_NOT_COMPARE, + TOY_TOKEN_OPERATOR_LESS, + TOY_TOKEN_OPERATOR_GREATER, + TOY_TOKEN_OPERATOR_LESS_COMPARE, + TOY_TOKEN_OPERATOR_GREATER_COMPARE, + TOY_TOKEN_OPERATOR_AND, + TOY_TOKEN_OPERATOR_OR, + TOY_TOKEN_OPERATOR_NOT, + + //control operators + TOY_TOKEN_OPERATOR_PAREN_LEFT, + TOY_TOKEN_OPERATOR_PAREN_RIGHT, + TOY_TOKEN_OPERATOR_BRACKET_LEFT, + TOY_TOKEN_OPERATOR_BRACKET_RIGHT, + TOY_TOKEN_OPERATOR_BRACE_LEFT, + TOY_TOKEN_OPERATOR_BRACE_RIGHT, + TOY_TOKEN_OPERATOR_SEMICOLON, + TOY_TOKEN_OPERATOR_COMMA, + + //other operators (context sensitive) + TOY_TOKEN_OPERATOR_QUESTION, + TOY_TOKEN_OPERATOR_COLON, + + TOY_TOKEN_OPERATOR_DOT, + TOY_TOKEN_OPERATOR_CONCAT, + TOY_TOKEN_OPERATOR_REST, + + //unused operators + TOY_TOKEN_OPERATOR_PIPE, + + //meta tokens + TOY_TOKEN_PASS, + TOY_TOKEN_ERROR, + TOY_TOKEN_EOF, +} Toy_TokenType; diff --git a/tests/.gitkeep b/tests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tools/.gitkeep b/tools/.gitkeep new file mode 100644 index 0000000..e69de29