User:Superstitionfreeblog
Subleq8: My 8 bit subleq interpreter source code
I think I am done with my 8 bit subleq interpreter. I haven't implemented any input routines, but I have implemented 3 output routines.
subleq(X, -1): printc. Prints the ascii character at address X subleq(X, -2): printb. Prints the value in decimal of 8 bit signed number at X and new line. subleq(X, -3): prints. Prints zero terminated string starting at address X.
Usage: subleq infile
Input file is a binary file. Each instruction is 3 bytes long. There's no opcodes in the binary file. Lower on this page, I discuss how to create these binary files using an assembler such as nasm.
The interpreter also supports shebangs. I also discuss this lower on this page.
Memory is static. 8 bit subleq can only address 128 bytes. Each memory location is probably initialized to 0, but I won't gamble on that. All 128 memory locations are allocated at the start of the interpreter, so there's no need to declare it. Just use it.
- I added code that initializes memory as all zeros at program start. Superstitionfreeblog (talk) 10:43, 14 November 2024 (UTC)
subleq(127, 127, -1)
Source Code
Compile with fasm.
~$ fasm subleq.asm subleq flat assembler version 1.73.30 (16384 kilobytes memory) 3 passes, 859 bytes. ~$ chmod 755 subleq ~$ ./subleq usage: subleq infile ~$ ./subleq hello1.sub Hello, world! ~$
format ELF64 executable 3
entry _start
define progname "subleq"
define membytes 256
define Sa r8 ;register to use as Register A
define Sb r9 ;register to use as Register B
define Sc r10 ;register to use as Register C
define Spc r12 ;register to use as Program Counter
define memory r13
segment executable readable
_start:
;-----added Nov 14, 2024-------------
mov rcx, membytes ;initialize all memory slots as 0
xor rax, rax
mov rdi, buffer0
rep stosb
;------------------------------------
mov rax, [rsp] ;get argc
cmp rax, 2 ;if argc is less than two
jl .usage ;print usage and exit.
mov rax, [rsp + 16] ;get first commandline arg(argv[1]).File to open.
mov [fin.name], rax ;save (pointer to) filename to open.
sub rsp, 144 ;push 144 bytes to the stack.
mov rax, 4 ;syscall 4 = stat. We want the file size.
mov rdi, [fin.name] ;File to get the stat on.
mov rsi, rsp ;stack pointer is where the data will go.
syscall ;Save the stat struct to the stack.
or rax, rax ;same as cmp rax, 0
js .error ;if less than 0. print error and exit.
mov rax, qword[rsp + 48] ;get the file size item of the struct.
mov [fin.siz], rax ;save it to memory
add rsp, 144 ;drop everything off the stack. Restore stack pointer.
mov rax, 2 ;open file
mov rsi, 0 ;readonly
syscall ;open file. file name is still in rdi
or rax, rax ;cmp rax, 0
js .error ;if rax < 0 print error and exit
mov [fin.desc], rax ;save file descriptor to memory
mov rdi, rax ;rax still has file descriptor.
xor rax, rax ;same as mov rax, 0
mov rsi, buffer0 ;array to save file in.
mov rdx, [fin.siz] ;number of bytes to read.
syscall ;read file directly to memory.
mov rax, 3 ;close
mov rdi, [fin.desc] ;file descriptor
syscall ;close the file. We no longer need it.
mov memory, buffer0
mov rax, qword[buffer0]
mov rsi, 0x0000000000ffffff
and rax, rsi
cmp rax, 0x002f2123
jne .interpreter
xor rax, rax
.loop2:
movzx rsi, byte[buffer0 + rax]
inc rax
or rsi, rsi
jz .plusoff
jmp .loop2
.plusoff:
add memory, rax
.interpreter: ;start of the actual interpreter.
xor Spc, Spc ;initialize Program Counter to 0
.while:
or Spc, Spc ;cmp Program Counter, 0
js .exit ;if less than 0 exit
movsx Sa, byte[memory + Spc + 0];Register A = value at Program Counter
movsx Sb, byte[memory + Spc + 1];Register B = value at Program Counter + 1
movsx Sc, byte[memory + Spc + 2];Register C = value at Program Counter + 2
or Sa, Sa ;cmp Register A, 0
js .input ;if less than 0 goto input routines(not implemented)
or Sb, Sb ;cmp Register B, 0
js .output ;goto output routines if < 0
movsx rdx, byte[memory + Sa] ;rdx = value at Regiter A's address
movsx rcx, byte[memory + Sb] ;rcx = value at Regiter B's address
sub rcx, rdx ;[B] = [B] - [A] (subtract)
mov byte[memory + Sb], cl ;save value to address in Register B
or rcx, rcx ;cmp rcx, 0
jg .increment ;increment Program Counter if rcx > 0
.jumpto:
mov Spc, Sc ;Program Counter = Register C
jmp .while ;Goto begining of program loop
.increment:
add Spc, 3 ;Add 3 to Program Counter
jmp .while ;Goto begining of program loop
.input:
jmp .increment ;Input routines not implemented yet.
.output:
not Sb ; this changes -1 to 0, -2 to 1, -3 to 2, etc
jmp qword[outfunc + Sb * 8] ; 0 is printc, 1 is printb
.printc:
mov rsi, Sa
add rsi, memory
mov rax, 1 ;write
mov rdi, 1 ;stdout
mov rdx, 1 ;one byte
syscall ;print the char
jmp .increment
.printb:
mov rdi, 10
sub rsp, 8
mov byte[rsp + 7], 0x0a
mov rsi, rsp
add rsi, 7
movsx rax, byte[memory + Sa]
or rax, rax
jns .loop0
neg rax
.loop0:
dec rsi
xor rdx, rdx
div rdi
add dl, '0'
mov [rsi], dl
or rax, rax
jnz .loop0
.sign:
movsx rax, byte[memory + Sa]
or rax, rax
jns .pcall
dec rsi
mov byte[rsi], '-'
.pcall:
mov rax, 1
mov rdx, rsp
add rdx, 8
sub rdx, rsi
mov rdi, 1
syscall
add rsp, 8
jmp .increment
.prints:
xor rdx, rdx
add rdx, Sa
.loop1:
movzx rax, byte[memory + rdx]
or rax, rax
jz .call00
inc rdx
jmp .loop1
.call00:
sub rdx, Sa
mov rax, 1
mov rdi, 1
mov rsi, Sa
add rsi, memory
syscall
jmp .increment
.error:
mov rax, 1
mov rdi, 1
movzx rdx, byte[emsg.len]
mov rsi, emsg
syscall ;print error message.
.usage:
mov rax, 1
mov rdi, 1
movzx rdx, byte[use.len]
mov rsi, use
syscall ;print usage message
jmp .exit
.exit:
mov rax, 60
xor rdi, rdi
syscall
segment readable ;rodata
emsg: db "An error occured!", 0x0a
.len: db $ - emsg
use: db "usage: ", progname, " infile", 0x0a
.len: db $ - use
outfunc: dq _start.printc, _start.printb, _start.prints
segment readable writeable ;bss
fin:
.name: rq 1
.desc: rq 1
.siz: rq 1
buffer0: rb membytes
Creating Binary Files Using Common Assemblers
My Subleq8 interpreter is coming along quite nicely. Unlike most such interpreters, mine doesn't read text files directly. It opens binary files and reads them directly into memory. So, we need a way to turn text files into binary files that my interpreter can read. My interpreter is written in x64 assembley with FASM in Linux. I also use NASM to write programs. I like both, but for different reasons. Either assembler can write binary files. Simply write a source file that is just a data segment. So, the hello subleq program on the Subleq page can be assembled with NASM like this.
; Output the character pointed to by p.
db a, a, $+3
db p, Z, $+3
db Z, a, $+3
db Z, Z, $+3
a: db 0, -1, $+3
; Increment p.
db m1, p, $+3
; Check if p < E.
db a, a, $+3
db E, Z, $+3
db Z, a, $+3
db Z, Z, $+3
db p, a, -1
db Z, Z, 0
p: db H
Z: db 0
m1: db -1
; Our text "Hello, world!\n" in ASCII codes
H: db 72, 101, 108
db 108, 111, 44
db 32, 87, 111
db 114, 108, 100
db 33, 10
E: db E
~$ nasm hello1.s -o hello1.sub ~$ hexdump -C hello1.sub 00000000 0c 0c 03 24 25 06 25 0c 09 25 25 0c 00 ff 0f 26 |...$%.%..%%....&| 00000010 24 12 0c 0c 15 35 25 18 25 0c 1b 25 25 1e 24 0c |$....5%.%..%%.$.| 00000020 ff 25 25 00 27 00 ff 48 65 6c 6c 6f 2c 20 57 6f |.%%.'..Hello, Wo| 00000030 72 6c 64 21 0a 35 |rld!.5| 00000036 ~$ ./subleq hello1.sub Hello, World! ~$
It's not very pretty code, but it works. However, we can make it pretty with macros.
%define subleq(a, b, c) db a, b, c
%define subleq(a, b) db a, b, $+3
%define dat db
; Output the character pointed to by p.
subleq(a, a)
subleq(p, Z)
subleq(Z, a)
subleq(Z, Z)
a: subleq(0, -1)
; Increment p.
subleq(m1, p)
; Check if p < E.
subleq(a, a)
subleq(E, Z)
subleq(Z, a)
subleq(Z, Z)
subleq(p, a, -1)
subleq(Z, Z, 0)
p: dat H
Z: dat 0
m1: dat -1
H: dat "Hello, World!", 10
E: dat E
Superstitionfreeblog (talk) 07:10, 30 October 2024 (UTC)
Shebang Support
Adding shebang support to my Subleq8 interpreter shouldn't be too hard. The shebang line must be at the front of the file, so the source can look something like this:
%define s(a, b, c) db a, b, c
%define s(a, b) db a, b, $+3
%define dat db
db "#!/usr/bin/subleq", 0 ;This line tells the shell what program to use to run this file.
org $$ - $ ;Tells the assembler to count this line as address 0
; Output the character pointed to by p.
m: s(a, a)
s(p, Z)
s(Z, a)
s(Z, Z)
a: s(m, -1)
...
...
...
So assemble and see what we have.
~$ nasm hello2.s -o hello2.sub ~$ hexdump -C hello2.sub 00000000 23 21 2f 75 73 72 2f 62 69 6e 2f 73 75 62 6c 65 |#!/usr/bin/suble| 00000010 71 00 0c 0c 03 24 25 06 25 0c 09 25 25 0c 00 ff |q....$%.%..%%...| 00000020 0f 26 24 12 0c 0c 15 35 25 18 25 0c 1b 25 25 1e |.&$....5%.%..%%.| 00000030 24 0c ff 25 25 00 27 00 ff 48 65 6c 6c 6f 2c 20 |$..%%.'..Hello, | 00000040 77 6f 72 6c 64 21 0a 35 |world!.5| 00000048 ~$
Now when I implement this in the interpreter, I'll need to check first to see if the shebang is there after I load the file into memory. Simply check if the first character in memory is '#", right? I don't think it's that simple. The first byte of our subleq program might be 0x23. If it is, the interpreter will check each byte after looking for the 0, then set the memory to be the next byte. That'll certainly eff our subleq program up and will definitely result in some unexpected behavior. Checking if the first two bytes is "#!" is probably better. As simple as:
mov ax, word[buffer] cmp ax, 0x2123 je .somewhere
There's still the chance that an actual subleq program will start with 23, 21. It's a lot less likely than the first case, but we're still gambling that it won't ever happen. To make it even less likely, we can check the first four bytes but ignore the fourth byte, because we want to check the first three bytes for "#!/". If we do this we can't use relative filenames like ./subleq or ~/subleq. The path must start with /. That might be ok, though.
mov eax, dword[buffer] and eax, 0x00ffffff cmp eax, 0x002f2123 je .somewhere
Might be the best we can do.
Superstitionfreeblog (talk) 07:10, 30 October 2024 (UTC)
- My Subleq8 interpreter now supports she-bangs. Now, we copy the interpreter to /usr/bin so we can run it from anywhere. With the shebang in the binary file, all we have to do is make the file executable and run it like you would a stand alone executable or a shell script.
~$ cat hello2.s
%define s(a, b, c) db a, b, c
%define s(a, b) db a, b, $+3
%define dat db
db "#!/usr/bin/subleq", 0
org $$ - $
; Output the character pointed to by p.
m: s(a, a)
s(p, Z)
s(Z, a)
s(Z, Z)
a: s(m, -1)
; Increment p.
s(m1, p)
; Check if p < E.
s(a, a)
s(E, Z)
s(Z, a)
s(Z, Z)
s(p, a, -1)
s(Z, Z, m)
p: dat H
Z: dat 0
m1: dat -1
H: dat "Hello, world!", 10
E: dat E
~$ nasm hello2.s -o hello2
~$ chmod 755 hello2
~$ ./hello2
Hello, world!
~$ file hello2
hello2: a /usr/bin/subleq script executable (binary data)
~$ hexdump -C hello2
00000000 23 21 2f 75 73 72 2f 62 69 6e 2f 73 75 62 6c 65 |#!/usr/bin/suble|
00000010 71 00 0c 0c 03 24 25 06 25 0c 09 25 25 0c 00 ff |q....$%.%..%%...|
00000020 0f 26 24 12 0c 0c 15 35 25 18 25 0c 1b 25 25 1e |.&$....5%.%..%%.|
00000030 24 0c ff 25 25 00 27 00 ff 48 65 6c 6c 6f 2c 20 |$..%%.'..Hello, |
00000040 77 6f 72 6c 64 21 0a 35 |world!.5|
00000048
~$
Superstitionfreeblog (talk) 07:30, 30 October 2024 (UTC)
Example Programs
Hello?
Using Printc
My interpreter has 3 output routines. Printc is the usual routine. It is called by trying to subtract from memory slot -1. It only prints the char pointed to by argument A. sq(message, -1). Argument C is ignored.
%define sq(a, b, c) db a, b, c
%define sq(a, b) db a, b, $+3
%define dat db
%define string db
shebang: string "#!/usr/bin/subleq", 0
org shebang - $
begin: sq(zero, message, -1)
sq(message, -1)
sq(neg1, begin+1)
sq(neg1, begin+3)
sq(zero, zero, begin)
message: string "Hello, Esolang!", 0x0a
zero: dat 0
neg1: dat -1
Output:
~$ nasm hello3.s -o hello3.sub ~$ chmod 755 hello3.sub ~$ ./hello3.sub Hello, Esolang! ~$
Using Prints
Prints prints a null terminated string. You call prints by passing -3 as argument B. This one kind of feels like cheating. You don't have to use it, though.
%define _(a, b, c) db a, b, c
%define _(a, b) db a, b, $+3
%define dat db
_(message, -3)
_(127, 127, -1) ;Can be any memory slot from 0 to 127.
message: dat "Hello, Esolang!", 10, 0
Output:
~$ nasm hello4.s -o hello4.sub ~$ subleq hello4.sub Hello, Esolang! ~$
Fibonacci Sequence
%define s(a, b, c) db a, b, c
%define s(a, b) db a, b, $+3
%define dat db
%define prev 0x50
%define curr 0x51
%define index 0x52
%define swap 0x53
%define zero 0x54
%define out 0x55
db `#!/usr/bin/subleq\0`
org $$-$
begin: s(zero, zero)
s(prev, prev)
s(curr, curr)
s(neg1, curr)
s(index, index)
s(limit, index)
loop: s(prev, zero)
s(curr, zero)
s(out, out)
s(zero, out)
s(out, -2)
s(swap, swap)
s(zero, swap)
s(zero, zero)
s(curr, zero)
s(prev, prev)
s(zero, prev)
s(zero, zero)
s(swap, zero)
s(curr, curr)
s(zero, curr)
s(one, index, -1)
s(zero, zero, loop)
one: dat 1
neg1: dat -1
limit: dat -10
Output:
~$ nasm fib.s -o fib.sub ~$ chmod 755 fib.sub ~$ ./fib.sub 1 2 3 5 8 13 21 34 55 89 ~$