diff --git a/scripts/small.toy b/scripts/small.toy index 9b6cede..9b3cd5b 100644 --- a/scripts/small.toy +++ b/scripts/small.toy @@ -1,21 +1,3 @@ -//test basic truth ternaries -{ - assert true ? true : false, "Basic true ternary failed"; - assert false ? false : true, "Basic false ternary failed"; -} -//test nesting -{ - fn least(a, b, c) { - return a < b ? a : b < c ? b : c; - } - - assert least(1, 2, 3) == 1, "Least 1, 2, 3 failed"; - assert least(10, 5, 7) == 5, "Least 10, 5, 7 failed"; - assert least(9, 7, 5) == 5, "Least 9, 7, 5 failed"; -} - - -print "All good"; - +print " foo \n bar"; \ No newline at end of file diff --git a/source/toy_lexer.c b/source/toy_lexer.c index ce60819..a7bd4ac 100644 --- a/source/toy_lexer.c +++ b/source/toy_lexer.c @@ -171,12 +171,24 @@ static Toy_Token makeIntegerOrFloat(Toy_Lexer* lexer) { } static Toy_Token makeString(Toy_Lexer* lexer, char terminator) { - while (!isAtEnd(lexer) && peek(lexer) != terminator) { + while (!isAtEnd(lexer)) { + //skip escaped terminators + if (peek(lexer) == '\\' && peekNext(lexer) == terminator) { + advance(lexer); + advance(lexer); + continue; + } + + //actually escape if you've hit the terminator + if (peek(lexer) == terminator) { + advance(lexer); //eat terminator + break; + } + + //otherwise advance(lexer); } - advance(lexer); //eat terminator - if (isAtEnd(lexer)) { return makeErrorToken(lexer, "Unterminated string"); } diff --git a/source/toy_parser.c b/source/toy_parser.c index cb01ef0..81f9706 100644 --- a/source/toy_parser.c +++ b/source/toy_parser.c @@ -261,17 +261,51 @@ static Toy_Opcode string(Toy_Parser* parser, Toy_ASTNode** nodeHandle) { //handle strings switch(parser->previous.type) { case TOY_TOKEN_LITERAL_STRING: { - int length = parser->previous.length; + //unescape valid escaped characters + int strLength = 0; + char* buffer = TOY_ALLOCATE(char, parser->previous.length); - //for safety - if (length > TOY_MAX_STRING_LENGTH) { - length = TOY_MAX_STRING_LENGTH; - char buffer[256]; - snprintf(buffer, 256, TOY_CC_ERROR "Strings can only be a maximum of %d characters long" TOY_CC_RESET, TOY_MAX_STRING_LENGTH); - error(parser, parser->previous, buffer); + for (int i = 0; i < parser->previous.length; i++) { + if (parser->previous.lexeme[i] != '\\') { //copy normally + buffer[strLength++] = parser->previous.lexeme[i]; + continue; + } + + //unescape based on the character + switch(parser->previous.lexeme[++i]) { + case 'r': + buffer[strLength++] = '\r'; + break; + case 'n': + buffer[strLength++] = '\n'; + break; + case 't': + buffer[strLength++] = '\t'; + break; + case '\\': + buffer[strLength++] = '\\'; + break; + case '"': + buffer[strLength++] = '"'; + break; + default: { + char msg[256]; + snprintf(msg, 256, TOY_CC_ERROR "Unrecognized escape character %c in string" TOY_CC_RESET, parser->previous.lexeme[++i]); + error(parser, parser->previous, msg); + } + } } - Toy_Literal literal = TOY_TO_STRING_LITERAL(Toy_createRefStringLength(parser->previous.lexeme, length)); + //for length safety + if (strLength > TOY_MAX_STRING_LENGTH) { + strLength = TOY_MAX_STRING_LENGTH; + char msg[256]; + snprintf(msg, 256, TOY_CC_ERROR "Strings can only be a maximum of %d characters long" TOY_CC_RESET, TOY_MAX_STRING_LENGTH); + error(parser, parser->previous, msg); + } + + Toy_Literal literal = TOY_TO_STRING_LITERAL(Toy_createRefStringLength(buffer, strLength)); + TOY_FREE_ARRAY(char, buffer, parser->previous.length); Toy_emitASTNodeLiteral(nodeHandle, literal); Toy_freeLiteral(literal); return TOY_OP_EOF; diff --git a/test/test_parser.c b/test/test_parser.c index 65b518c..99882d5 100644 --- a/test/test_parser.c +++ b/test/test_parser.c @@ -36,17 +36,17 @@ int main() { //inspect the node if (node == NULL) { - fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is null" TOY_CC_RESET); + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is null\n" TOY_CC_RESET); return -1; } if (node->type != TOY_AST_NODE_UNARY || node->unary.opcode != TOY_OP_PRINT) { - fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is not a unary print instruction" TOY_CC_RESET); + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is not a unary print instruction\n" TOY_CC_RESET); return -1; } if (node->unary.child->type != TOY_AST_NODE_LITERAL || !TOY_IS_NULL(node->unary.child->atomic.literal)) { - fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode to be printed is not a null literal" TOY_CC_RESET); + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode to be printed is not a null literal\n" TOY_CC_RESET); return -1; } @@ -70,7 +70,7 @@ int main() { while (node != NULL) { if (node->type == TOY_AST_NODE_ERROR) { - fprintf(stderr, TOY_CC_ERROR "ERROR: Error node detected" TOY_CC_RESET); + fprintf(stderr, TOY_CC_ERROR "ERROR: Error node detected\n" TOY_CC_RESET); return -1; } @@ -83,6 +83,44 @@ int main() { free((void*)source); } + { + //test parsing of escaped characters + char* source = "print \"\\\"\";"; //NOTE: this string goes through two layers of escaping + + //test parsing + Toy_Lexer lexer; + Toy_Parser parser; + Toy_initLexer(&lexer, source); + Toy_initParser(&parser, &lexer); + + Toy_ASTNode* node = Toy_scanParser(&parser); + + //inspect the node + if (node == NULL) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is null\n" TOY_CC_RESET); + return -1; + } + + if (node->type != TOY_AST_NODE_UNARY || node->unary.opcode != TOY_OP_PRINT) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode is not a unary print instruction\n" TOY_CC_RESET); + return -1; + } + + if (node->unary.child->type != TOY_AST_NODE_LITERAL || !TOY_IS_STRING(node->unary.child->atomic.literal)) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode to be printed is not a string literal\n" TOY_CC_RESET); + return -1; + } + + if (!Toy_equalsRefStringCString(TOY_AS_STRING(node->unary.child->atomic.literal), "\"")) { + fprintf(stderr, TOY_CC_ERROR "ERROR: ASTNode to be printed is not an escaped character, found: %s\n" TOY_CC_RESET, Toy_toCString(TOY_AS_STRING(node->unary.child->atomic.literal))); + return -1; + } + + //cleanup + Toy_freeASTNode(node); + Toy_freeParser(&parser); + } + printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET); return 0; }