Added proper lexing, whole thing is now a look-back parser

2025-11-29 02:34:28 +11:00 · 2020-09-07 00:20:21 +10:00
parent 5183f89c91
commit 42e4990600
3 changed files with 111 additions and 56 deletions
--- a/server/database.js
+++ b/server/database.js
@@ -21,7 +21,7 @@ let authors = [
 	},
 	
 	{
-		name: 'KayneRuse',
+		name: 'Kayne Ruse',
 		books: [
 			{ title: 'alpha', published: "1" },
 			{ title: 'beta', published: "2" },
--- a/server/handler.js
+++ b/server/handler.js
@@ -26,11 +26,11 @@ const handler = {
 		//if this query has a matched scalar, filter by that match
 		books = books.filter(b => {
 			return scalars.every(s => {
-				return !s.match || b[s.name] === s.match; //other filter methods, such as ranges of numbers, can also be implemented
+				return !s.match || b[s.name].toUpperCase() === s.match.toUpperCase(); //other filter methods, such as ranges of numbers, can also be implemented
 			});
 		});

-		//return all books after filtering
+		//return all book fields after filtering
 		const fields = scalars.map(s => s.name);
 		return books.map(b => {
 			const ret = {};
@@ -59,11 +59,11 @@ const handler = {
 		//if this query has a matched scalar, filter by that match
 		authors = authors.filter(a => {
 			return scalars.every(s => {
-				return !s.match || a[s.name] === s.match; //other filter methods, such as ranges of numbers, can also be implemented
+				return !s.match || a[s.name].toUpperCase() === s.match.toUpperCase(); //other filter methods, such as ranges of numbers, can also be implemented
 			});
 		});

-		//return all authors
+		//return all author fields after filtering
 		const fields = scalars.map(s => s.name);
 		return authors.map(a => {
 			const ret = {};
--- a/server/simpleQL/index.js
+++ b/server/simpleQL/index.js
@@ -19,7 +19,7 @@ const main = (schema, handler) => {
 	return async reqBody => {
 		try {
 			//parse the query
-			const tokens = reqBody.split(/(\s+)/).filter(s => s.trim().length > 0); //TODO: proper token parsing
+			const tokens = lexify(reqBody, true);
 			let pos = 0;

 			//check for keywords
@@ -33,7 +33,12 @@ const main = (schema, handler) => {

 				//no leading keyword - regular query
 				default:
-					const result = await parseQuery(handler, tokens, pos, typeGraph);
+					const [result, endPos] = await parseQuery(handler, tokens, pos, typeGraph);
+
+					//reject the request, despite finishing processing it
+					if (tokens[endPos]) {
+						throw 'Unexpected data found at the end of the token list (found ' + tokens[endPos] + ')';
+					}

 					return [200, result];

@@ -58,30 +63,30 @@ const buildTypeGraph = schema => {
 	};

 	//parse the schema
-	const tokens = schema.split(/(\s+)/).filter(s => s.trim().length > 0); //TODO: proper token parsing
+	const tokens = lexify(schema, false);
 	let pos = 0;

-	while (tokens[pos]) {
+	while (tokens[pos++]) {
 		//check for keywords
-		switch(tokens[pos++]) {
+		switch(tokens[pos - 1]) {
 			case 'type':
-				graph[tokens[pos]] = parseCompoundType(tokens, pos);
+				graph[tokens[pos++]] = parseCompoundType(tokens, pos);

 				//advance to the end of the compound type
-				pos = eatBlock(tokens, pos + 2); //+2: skip the name & opening bracket
+				pos = eatBlock(tokens, pos);

 				break;

 			case 'scalar':
-				if (keywords.includes(graph[tokens[pos]])) {
-					throw 'Unexpected keyword ' + graph[tokens[pos]];
+				if (keywords.includes(graph[tokens[pos - 1]])) {
+					throw 'Unexpected keyword ' + graph[tokens[pos - 1]];
 				}

 				graph[tokens[pos++]] = { scalar: true };
 				break;

 			default:
-				throw 'Unknown token ' + tokens[pos -1];
+				throw 'Unknown token ' + tokens[pos - 1];
 		}
 	}

@@ -89,10 +94,8 @@ const buildTypeGraph = schema => {
 };

 const parseCompoundType = (tokens, pos) => {
-	pos++; //eat the compound name
-
 	//format check (not strictly necessary, but it looks nice)
-	if (tokens[pos++] != '{') {
+	if (tokens[pos] !== '{') {
 		throw 'Expected \'{\' in compound type definition';
 	}

@@ -100,9 +103,9 @@ const parseCompoundType = (tokens, pos) => {
 	const compound = {};

 	//for each line of the compound type
-	while (tokens[pos] && tokens[pos] != '}') {
+	while (tokens[pos++] && tokens[pos] !== '}') {
 		let type = tokens[pos++];
-		const name = tokens[pos++];
+		const name = tokens[pos];

 		//parse the extra typing data
 		let array = false;
@@ -151,19 +154,19 @@ const parseQuery = async (handler, tokens, pos, typeGraph, parent = null) => {
 	//determine this type
 	let queryType;

-	if (typeGraph[tokens[pos]] && typeGraph[tokens[pos]].scalar) {
-		queryType = tokens[pos];
-	}
-
-	else if (parent && typeGraph[parent.typeName][tokens[pos]]) {
-		queryType = typeGraph[parent.typeName][tokens[pos]].typeName;
-	} else {
-		queryType = tokens[pos];
-	}
-
-	//move on
+	//only read past tokens
 	pos++;

+	if (typeGraph[tokens[pos - 1]] && typeGraph[tokens[pos - 1]].scalar) {
+		queryType = tokens[pos - 1];
+	}
+
+	else if (parent && typeGraph[parent.typeName][tokens[pos - 1]]) {
+		queryType = typeGraph[parent.typeName][tokens[pos - 1]].typeName;
+	} else {
+		queryType = tokens[pos - 1];
+	}
+
 	if (tokens[pos++] != '{') {
 		throw 'Expected \'{\' after queried type';
 	}
@@ -172,59 +175,62 @@ const parseQuery = async (handler, tokens, pos, typeGraph, parent = null) => {
 	const scalarFields = [];
 	const deferredCalls = []; //functions (promises) that will be called at the end of this function

-	while(tokens[pos] && tokens[pos] != '}') { //while not at the end of this block
+	while(tokens[pos++] && tokens[pos - 1] !== '}') { //while not at the end of this block
 		let match = false;

-		if (tokens[pos] === 'match') {
+		if (tokens[pos - 1] === 'match') {
 			match = true;
 			pos++;
 		}

 		//prevent using keywords
-		if (keywords.includes(tokens[pos])) {
-			throw 'Unexpected keyword ' + tokens[pos];
+		if (keywords.includes(tokens[pos - 1])) {
+			throw 'Unexpected keyword ' + tokens[pos - 1];
 		}

 		//type is a scalar, and can be queried
-		if (typeGraph[queryType] && typeGraph[queryType][tokens[pos]] && typeGraph[typeGraph[queryType][tokens[pos]].typeName].scalar) {
+		if (typeGraph[queryType] && typeGraph[queryType][tokens[pos - 1]] && typeGraph[typeGraph[queryType][tokens[pos - 1]].typeName].scalar) {
 			//push the scalar object to the queryFields
-			scalarFields.push({ typeName: typeGraph[queryType][tokens[pos]].typeName, name: tokens[pos], match: match ? tokens[++pos] : null });
+			scalarFields.push({ typeName: typeGraph[queryType][tokens[pos - 1]].typeName, name: tokens[pos - 1], match: match ? tokens[pos++] : null });

 			//if I am a scalar child of a match amd I do not match
 			if (parent && parent.match && !match) {
-				throw 'Broken match chain in scalar type ' + tokens[pos];
+				throw 'Broken match chain in scalar type ' + tokens[pos - 1];
 			}
-
-			pos++;
 		}
-		else if (typeGraph[queryType] && typeGraph[queryType][tokens[pos]] && !typeGraph[typeGraph[queryType][tokens[pos]].typeName].scalar) {
+
+		else if (typeGraph[queryType] && typeGraph[queryType][tokens[pos - 1]] && !typeGraph[typeGraph[queryType][tokens[pos - 1]].typeName].scalar) {
 			const pos2 = pos; //cache the value to keep it from changing

 			//recurse
 			deferredCalls.push(async (result) => {
 				//if I am a compound child of a match amd I do not match
 				if (parent && parent.match && !match) {
-					throw 'Broken match chain in compound type ' + tokens[pos2];
+					throw 'Broken match chain in compound type ' + tokens[pos2 - 1];
 				}

-				return [tokens[pos2], await parseQuery(
+				const [queryResult, dummyPos] = await parseQuery(
 					handler,
 					tokens,
-					pos2,
+					pos2 - 1,
 					typeGraph,
 					{ typeName: queryType, scalars: scalarFields, context: result, match: match } //parent object (this one)
-				), match]; //HACK: match piggybacking on the tuple
+				);
+
+				return [tokens[pos2], queryResult, match]; //HACK: match piggybacking on the tuple
 			});

 			pos = eatBlock(tokens, pos + 2);
 		} else {
 			//token is something else?
-			throw 'Found something not in the type graph: ' + tokens[pos] + " " + pos;
+			throw 'Found something not in the type graph: ' + tokens[pos - 1] + " " + (pos - 1);
 		}
 	}

 	//eat the end bracket
-	pos++;
+	if (tokens[pos - 1] !== '}') {
+		throw 'Expected \'}\' at the end of query (found ' + tokens[pos] + ')';
+	}

 	if (!handler[queryType]) {
 		throw 'Unrecognized type ' + queryType;
@@ -237,7 +243,6 @@ const parseQuery = async (handler, tokens, pos, typeGraph, parent = null) => {
 		const tuples = await Promise.all(deferredCalls.map(async call => await call(res)));

 		if (!tuples.every(tuple => !tuple[2] || tuple[1].length > 0)) {
-			console.log('discarding', tuples);
 			return [];
 		}

@@ -247,7 +252,7 @@ const parseQuery = async (handler, tokens, pos, typeGraph, parent = null) => {
 	}));

 	results = results.filter(r => Array.isArray(r) && r.length == 0 ? false : true);
-	return results;
+	return [results, pos];
 };

 //utils
@@ -258,15 +263,65 @@ const checkAlphaNumeric = (str) => {
 };

 const eatBlock = (tokens, pos) => {
-	while (tokens[pos] && tokens[pos] != '}') {
+	while (tokens[pos++] && tokens[pos - 1] !== '}') {
 		if (tokens[pos] == '{') {
-			pos = eatBlock(tokens, pos+1);
-		} else {
-			pos++;
+			pos = eatBlock(tokens, pos);
 		}
 	}

-	return ++pos; //eat the final '}'
+	if (tokens[pos - 1] !== '}') { //eat the final '}'
+		throw 'Expected \'}\' while eating block (found ' + tokens[pos - 1] + ')';
+	}
+
+	return pos;
+};
+
+const lexify = (body, allowStrings) => {
+	let current = 0;
+	tokens = [];
+
+	while(body[current++]) {
+		switch(body[current - 1]) {
+			case '{':
+			case '}':
+				//push just this symbol
+				tokens.push(body.substring(current - 1, current));
+				break;
+
+			case '"': {
+				if (!allowStrings) {
+					throw 'Can\'t lex strings';
+				}
+
+				const start = current;
+				while (body[current++] !== '"') { //find the terminating '
+					if (!body[current - 1]) {
+						throw 'Unterminated string';
+					}
+				}
+				tokens.push(body.substring(start, current - 1));
+				break;
+			}
+
+			default: {
+				//ignore whitespace
+				if (/\s/.test(body[current - 1])) {
+					break;
+				}
+
+				//anything else is a multi-character token
+				const start = current;
+				while(body[current - 1] && !/[{}"\s]/.test(body[current - 1])) {
+					current++;
+				}
+				tokens.push(body.substring(start - 1, current - 1));
+				break;
+			}
+		}
+	}
+
+	console.log(tokens);
+	return tokens;
 };

 //return