diff --git a/source/toy_lexer.c b/source/toy_lexer.c index a7bd4ac..c77df7c 100644 --- a/source/toy_lexer.c +++ b/source/toy_lexer.c @@ -141,12 +141,12 @@ static Toy_Token makeToken(Toy_Lexer* lexer, Toy_TokenType type) { static Toy_Token makeIntegerOrFloat(Toy_Lexer* lexer) { Toy_TokenType type = TOY_TOKEN_LITERAL_INTEGER; //what am I making? - while(isDigit(lexer)) advance(lexer); + while(isDigit(lexer) || peek(lexer) == '_') advance(lexer); if (peek(lexer) == '.' && (peekNext(lexer) >= '0' && peekNext(lexer) <= '9')) { //BUGFIX: peekNext == digit type = TOY_TOKEN_LITERAL_FLOAT; advance(lexer); - while(isDigit(lexer)) advance(lexer); + while(isDigit(lexer) || peek(lexer) == '_') advance(lexer); } Toy_Token token; diff --git a/source/toy_parser.c b/source/toy_parser.c index a559da1..94f2bf8 100644 --- a/source/toy_parser.c +++ b/source/toy_parser.c @@ -508,6 +508,22 @@ static Toy_Opcode unary(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { return TOY_OP_EOF; } +static char* removeChar(char* lexeme, int length, char c) { + int resPos = 0; + char* result = TOY_ALLOCATE(char, length + 1); + + for (int i = 0; i < length; i++) { + if (lexeme[i] == c) { + continue; + } + + result[resPos++] = lexeme[i]; + } + + result[resPos] = '\0'; + return result; +} + static Toy_Opcode atomic(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { switch(parser->previous.type) { case TOY_TOKEN_NULL: @@ -524,14 +540,18 @@ static Toy_Opcode atomic(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { case TOY_TOKEN_LITERAL_INTEGER: { int value = 0; - sscanf(parser->previous.lexeme, "%d", &value); + char* lexeme = removeChar(parser->previous.lexeme, parser->previous.length, '_'); + sscanf(lexeme, "%d", &value); + TOY_FREE_ARRAY(char, lexeme, parser->previous.length + 1); Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_INTEGER_LITERAL(value)); return TOY_OP_EOF; } case TOY_TOKEN_LITERAL_FLOAT: { float value = 0; - sscanf(parser->previous.lexeme, "%f", &value); + char* lexeme = removeChar(parser->previous.lexeme, parser->previous.length, '_'); + sscanf(lexeme, "%f", &value); + TOY_FREE_ARRAY(char, lexeme, parser->previous.length + 1); Toy_emitASTNodeLiteral(nodeHandle, TOY_TO_FLOAT_LITERAL(value)); return TOY_OP_EOF; } diff --git a/test/test_parser.c b/test/test_parser.c index 99882d5..de3cdd3 100644 --- a/test/test_parser.c +++ b/test/test_parser.c @@ -121,6 +121,44 @@ int main() { Toy_freeParser(&parser); } + { + //test parsing of underscored numbers + char* source = "print 1_000_000;"; + + //test parsing + Toy_Lexer lexer; + Toy_Parser parser; + Toy_initLexer(&lexer, source); + Toy_initParser(&parser, &lexer); + + Toy_ASTNode* node = Toy_scanParser(&parser); + + //inspect the node + if (node == NULL) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is null\n" TOY_CC_RESET); + return -1; + } + + if (node->type != TOY_AST_NODE_UNARY || node->unary.opcode != TOY_OP_PRINT) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is not a unary print instruction\n" TOY_CC_RESET); + return -1; + } + + if (node->unary.child->type != TOY_AST_NODE_LITERAL || !TOY_IS_INTEGER(node->unary.child->atomic.literal)) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode to be printed is not a string literal\n" TOY_CC_RESET); + return -1; + } + + if (TOY_AS_INTEGER(node->unary.child->atomic.literal) != 1000000) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode to be printed is not the correct value, found: %d\n" TOY_CC_RESET, TOY_AS_INTEGER(node->unary.child->atomic.literal)); + return -1; + } + + //cleanup + Toy_freeASTNode(node); + Toy_freeParser(&parser); + } + printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET); return 0; }