From 023cf9c8b5f25fc8604d80b0d726425164970ae3 Mon Sep 17 00:00:00 2001
From: Kayne Ruse <kayneruse@gmail.com>
Date: Sat, 31 Aug 2024 21:27:50 +1000
Subject: [PATCH] Wrote bytecode-format.txt

It's annoying that I can only work for two hours at a time
---
 .notes/SECD-concept.txt    | 79 +++++++++++++++-----------------------
 .notes/bytecode-format.txt | 71 ++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 48 deletions(-)
 create mode 100644 .notes/bytecode-format.txt

diff --git a/.notes/SECD-concept.txt b/.notes/SECD-concept.txt
index a414380..686fdf4 100644
--- a/.notes/SECD-concept.txt
+++ b/.notes/SECD-concept.txt
@@ -1,3 +1,7 @@
+This file is messy and confusing, and makes sense to nobody but me - so don't worry about understanding it too much - better docs will come later.
+
+===
+
 SECD = State, Environment, Control, Dump
 
 The idea of "Landin's SECD Machine" is to store the working memory in S, the variable-value bindings in E, the code/instructions in C, and the program stack in D.
@@ -7,10 +11,16 @@ Notes:
 
 	The environment, denoted with an E, is created on routine start, and destroyed on routine end - however, it uses the parent routine's environment as the starting point for it's creation, so closures work as expected
 
-	unlike version 1, identifiers are not a valid datatype.
+	unlike version 1, identifiers are not a valid datatype - they're just an index representing a symbol, like "standard::clock"
 
 	placeholder opcodes - EOF, PASS, ERROR,
 
+	a "value" can be of any valid datatype, and may point to various parts of memory to define it's value
+
+	Symbols will be awkward... I suspect the symbol table might need to be rebuilt on startup, as the order of the modules will not necessarily be the same each time
+
+	The various instances of S could be the same array in memory, simply marked as "unused"? You could stick C on there as a value before "pushing" for a new routine
+
 Things to consider later:
 	type cast?
 	rest parameter?
@@ -28,13 +38,13 @@ ASSERT
 PRINT
 	pop S(0), and print the output
 SET
-	read one word from C, saves the key E[word] to the value S(0), popping S(0)
+	read one word from C, saves the key E[SYMBOL(word)] to the value S(0), popping S(0)
 GET
-	read one word from C, finds the value of E[word], leaves the value on S
+	read one word from C, finds the value of E[SYMBOL(word)], leaves the value on S
 DECLARE
-	read two words from C, create a new entry in E with the key E[word1], the type defined by word2, the value 'null'
+	read two words from C, create a new entry in E with the key E[SYMBOL(word1)], the type defined by word2, the value 'null'
 DEFINE
-	read two words from C, create a new entry in E with the key E[word1], the type defined by word2, the value popped from S(0)
+	read two words from C, create a new entry in E with the key E[SYMBOL(word1)], the type defined by word2, the value popped from S(0)
 
 
 //arithmetic instructions
@@ -54,7 +64,7 @@ MODULO
 COMPARE_EQUAL
 	pops S(-1) and S(0), replacing it with TRUE or FALSE, depending on equality
 COMPARE_LESS
-	pops S(-1) and S(0), replacing it with TRUE or FALSE, depending on comparisoncomparison
+	pops S(-1) and S(0), replacing it with TRUE or FALSE, depending on comparison
 COMPARE_LESS_EQUAL
 	pops S(-1) and S(0), replacing it with TRUE or FALSE, depending on comparison
 COMPARE_GREATER
@@ -76,13 +86,14 @@ INVERT
 
 //control instructions
 JUMP
-	read one value from C, and move the program counter to that location
+	read one value from C, and move the program counter to that location (relative to the current position)
 JUMP_IF_FALSE
-	read one value from C, pops S(0), and move the program counter to that location if the popped value is falsy
+	read one value from C, pops S(0), and move the program counter to that location (relative to the current position) if the popped value is falsy
 FN_CALL
-	*read a list of arguments specified in C into 'A', store (S, E, C, D) as D, wipe S and E, move the stack pointer to the specified routine, set E based on the contents of 'A'
+	*read a list of arguments specified in C into 'A', store (S, E, C, D) as D, push S, move the stack pointer to the specified routine, push a new E based on the contents of 'A'
 FN_RETURN
-	*read a list of return values specified in C into 'R', wipe S and E, restoroutine re (S, E, C, D) from D(0) popping it, store the contents of 'R' in E or S based on the next few parts of C
+This 
+	*read a list of return values specified in C into 'R', pop S, restore (S, E, C, D) from D(0) popping it, store the contents of 'R' in E or S based on the next few parts of C
 
 //bespoke utility instructions
 IMPORT
@@ -96,21 +107,21 @@ SCOPE_END
 
 ===
 
-FN_CALLonly
+FN_CALL
 	read word: read the following N arguments
 
 		for 0 to N do:
 			read word as match: # this allows literals and identifiers as arguments
 				stack: then pop S(0) into 'A'
-				**env: then read word, load E[word]*** into 'A'
+				**env: then read word, load E[SYMBOL(word)] into 'A'
 
 	read word:
-		store (S,E,C,D) as D
-		wipe S and E
-		jump C to routines[word]
+		determine where the routine is (is it new or is it a value?) and hold it for a moment
+		push E and C into a frame marker on S
+		jump C to the routine
 
 	read word:
-		read the following N parameter names, storing each member of 'A' as their value in E[name]***
+		read the following N parameter names, storing each member of 'A' as their value in E[SYMBOL(name)]
 
 	continue
 
@@ -120,21 +131,20 @@ FN_RETURN
 		for 0 to N do:
 			read word as match: # this allows literals and identifiers as arguments
 				stack: then pop S(0) into 'R'
-				**env: then read word, load E[word]*** into 'R'
+				**env: then read word, load E[SYMBOL(word)] into 'R'
 
-	restore (S,E,C,D) from D(0), popping it # this wipes S and C from the routine, and returns C to the pre-call position
+	pop E and S
+	extract and restore E and C from the frame marker on S
 
 	read word: read the following N storage locations for the values within `R`
 
 		for 0 to N do:
 			read word as match: # you're effectively reversing the prior reads
 				stack: then push from 'R' onto S
-				**env: then read word, save 'R' into E[word]***
+				**env: then read word, save 'R' into E[SYMBOL(word)]
 
 **This could work by listing the sources as e.g. "SSSExS" - three stacks and one environment variable loaded onto the stack, then one more stack for a total of four values
 
-***E[word] would more accurately be E[.data[word]], where '.data' is for the currently loaded routine
-
 Notes:
 	the bytecode of a funtion call would look like:
 	
@@ -144,30 +154,3 @@ Notes:
 
 ===
 
-.header:
-	N total length
-	N .args count
-	N .data count
-	N .routine count
-	.args start
-	.code start
-	.datatable start
-	.data start
-	.routine start
-	//any additional metadata can go here
-
-.args: # these keys stored in E before execution begins
-
-.code:
-	READ 0
-	LOAD 0
-	ASSERT
-
-.datatable: # could list the starts as a jump table, since members of data and routines have unknown sizes
-	0 -> 0x00
-
-.data:
-	"Hello world"
-
-.routines: # this stores inner routines, in sequence
-
diff --git a/.notes/bytecode-format.txt b/.notes/bytecode-format.txt
new file mode 100644
index 0000000..c7b93e8
--- /dev/null
+++ b/.notes/bytecode-format.txt
@@ -0,0 +1,71 @@
+The bytecode format
+
+===
+
+There are four components in the bytecode header:
+
+TOY_VERSION_MAJOR
+TOY_VERSION_MINOR
+TOY_VERSION_PATCH
+TOY_VERSION_BUILD
+
+The first three are each one unsigned byte, and the fourth is a null terminated C-string.
+
+ * Under no circumstance, should you ever run bytecode whose major version is different
+ * Under no circumstance, should you ever run bytecode whose minor version is above the interpreter’s minor version
+ * You may, at your own risk, attempt to run bytecode whose patch version is different from the interpreter’s patch version
+ * You may, at your own risk, attempt to run bytecode whose build version is different from the interpreter’s build version
+
+An additional note: The contents of the build string may be anything, such as:
+
+ * the compilation date and time of the interpreter
+ * a marker identifying the current fork and/or branch
+ * identification information, such as the developer's copyright
+ * a link to Risk Astley's "Never Gonna Give You Up" on YouTube
+
+===
+
+At this time, a 'module' consists of a single 'routine', which acts as its global scope.
+
+Additional information may be added later, or multiple 'modules' listed sequentially may be a possibility.
+
+===
+
+# the routine structure, which is potentially recursive
+
+# symbol shorthand : 'module::identifier'
+# where 'module' can be omitted if it's local to this module ('identifier' within the symbols is calculated at the module level, it's always unique)
+
+.header:
+	total size         # size of this routine, including all data and subroutines
+	N .param count     # the number of parameter fields expected
+	N .data count      # the number of data fields expected
+	N .routine count   # the number of routines present
+	.param start       # absolute address of .param;     omitted if not needed
+	.code start        # absolute address of .code;      mandatory
+	.datatable start   # absolute address of .datatable; omitted if not needed
+	.data start        # absolute address of .data;      omitted if not needed
+	.routine start     # absolute address of .routine;   omitted if not needed
+	# additional metadata fields can be added later
+
+.param:
+	# a list of symbols to be used as keys in the environment
+
+.code:
+	# instructions read and 'executed' by the interpreter
+	READ 0
+	LOAD 0
+	ASSERT
+
+.datatable:
+	# a 'symbol -> pointer' jumptable for quickly looking up values in .data and .routines
+	0 -> {string, 0x00}
+	1 -> {fn, 0xFF}
+
+.data:
+	# data that can't really be embedded into .code
+	"Hello world"
+
+.routines:
+	# inner routines, each of which conforms to this spec
+