User:Superstitionfreeblog

From Esolang
Jump to navigation Jump to search

Subleq8: My 8 bit subleq interpreter source code

I think I am done with my 8 bit subleq interpreter. I haven't implemented any input routines, but I have implemented 3 output routines.

subleq(X, -1): printc. Prints the ascii character at address X
subleq(X, -2): printb. Prints the value in decimal of 8 bit signed number at X and new line.
subleq(X, -3): prints. Prints zero terminated string starting at address X.

Usage: subleq infile

Input file is a binary file. Each instruction is 3 bytes long. There's no opcodes in the binary file. Lower on this page, I discuss how to create these binary files using an assembler such as nasm.

The interpreter also supports shebangs. I also discuss this lower on this page.

Memory is static. 8 bit subleq can only address 128 bytes. Each memory location is probably initialized to 0, but I won't gamble on that. All 128 memory locations are allocated at the start of the interpreter, so there's no need to declare it. Just use it.

I added code that initializes memory as all zeros at program start. Superstitionfreeblog (talk) 10:43, 14 November 2024 (UTC)
subleq(127, 127, -1)

Source Code

Compile with fasm.

~$ fasm subleq.asm subleq
flat assembler  version 1.73.30  (16384 kilobytes memory)
3 passes, 859 bytes.
~$ chmod 755 subleq
~$ ./subleq
usage: subleq infile
~$ ./subleq hello1.sub
Hello, world!
~$ 
format ELF64 executable 3
entry _start
define progname "subleq"
define membytes 256
define Sa  r8                        ;register to use as Register A
define Sb  r9                        ;register to use as Register B
define Sc  r10                       ;register to use as Register C
define Spc r12                       ;register to use as Program Counter
define memory    r13
segment executable readable
_start:
;-----added Nov 14, 2024-------------
    mov    rcx, membytes             ;initialize all memory slots as 0
    xor    rax, rax
    mov    rdi, buffer0
    rep stosb
;------------------------------------
    mov    rax, [rsp]                ;get argc
    cmp    rax,    2                 ;if argc is less than two
    jl     .usage                    ;print usage and exit.
    mov    rax, [rsp + 16]           ;get first commandline arg(argv[1]).File to open.
    mov    [fin.name], rax           ;save (pointer to) filename to open.
    sub    rsp,    144               ;push 144 bytes to the stack.
    mov    rax,    4                 ;syscall 4 = stat. We want the file size.
    mov    rdi,    [fin.name]        ;File to get the stat on.
    mov    rsi,    rsp               ;stack pointer is where the data will go.
    syscall                          ;Save the stat struct to the stack.
    or     rax,    rax               ;same as cmp rax, 0
    js     .error                    ;if less than 0. print error and exit.
    mov    rax,    qword[rsp + 48]   ;get the file size item of the struct.
    mov    [fin.siz], rax            ;save it to memory
    add    rsp,    144               ;drop everything off the stack. Restore stack pointer.
    mov    rax,    2                 ;open file
    mov    rsi,    0                 ;readonly
    syscall                          ;open file. file name is still in rdi
    or     rax,    rax               ;cmp rax, 0
    js     .error                    ;if rax < 0 print error and exit
    mov    [fin.desc], rax           ;save file descriptor to memory
    mov    rdi,    rax               ;rax still has file descriptor.
    xor    rax,    rax               ;same as mov rax, 0
    mov    rsi,    buffer0           ;array to save file in.
    mov    rdx,    [fin.siz]         ;number of bytes to read.
    syscall                          ;read file directly to memory.
    mov    rax,    3                 ;close
    mov    rdi,    [fin.desc]        ;file descriptor
    syscall                          ;close the file. We no longer need it.
    mov    memory, buffer0
    mov    rax, qword[buffer0]
    mov    rsi, 0x0000000000ffffff
    and    rax, rsi
    cmp    rax, 0x002f2123
    jne    .interpreter
    xor    rax, rax
 .loop2:
    movzx  rsi, byte[buffer0 + rax]
    inc    rax
    or     rsi, rsi
    jz     .plusoff
    jmp   .loop2
 .plusoff:
    add    memory, rax
 .interpreter:                       ;start of the actual interpreter.
    xor    Spc,    Spc               ;initialize Program Counter to 0
 .while:
    or     Spc,    Spc               ;cmp Program Counter, 0
    js    .exit                      ;if less than 0 exit
    movsx  Sa, byte[memory + Spc + 0];Register A = value at Program Counter
    movsx  Sb, byte[memory + Spc + 1];Register B = value at Program Counter + 1
    movsx  Sc, byte[memory + Spc + 2];Register C = value at Program Counter + 2
    or     Sa, Sa                    ;cmp Register A, 0
    js    .input                     ;if less than 0 goto input routines(not implemented)
    or     Sb, Sb                    ;cmp Register B, 0
    js    .output                    ;goto output routines if < 0
    movsx  rdx, byte[memory + Sa]    ;rdx = value at Regiter A's address
    movsx  rcx, byte[memory + Sb]    ;rcx = value at Regiter B's address
    sub    rcx, rdx                  ;[B] = [B] - [A] (subtract)
    mov    byte[memory + Sb], cl     ;save value to address in Register B
    or     rcx, rcx                  ;cmp rcx, 0
    jg     .increment                ;increment Program Counter if rcx > 0
 .jumpto:
    mov    Spc, Sc                   ;Program Counter = Register C
    jmp    .while                    ;Goto begining of program loop
 .increment:
    add    Spc, 3                    ;Add 3 to Program Counter
    jmp    .while                    ;Goto begining of program loop
 .input:
    jmp .increment                   ;Input routines not implemented yet.
 .output:
    not    Sb                        ; this changes -1 to 0, -2 to 1, -3 to 2, etc
    jmp    qword[outfunc + Sb * 8]   ; 0 is printc, 1 is printb
 .printc:
    mov    rsi, Sa
    add    rsi, memory
    mov    rax, 1                    ;write
    mov    rdi, 1                    ;stdout
    mov    rdx, 1                    ;one byte
    syscall                          ;print the char
    jmp    .increment
 .printb:
    mov    rdi, 10
    sub    rsp, 8
    mov    byte[rsp + 7], 0x0a
    mov    rsi, rsp
    add    rsi, 7
    movsx  rax, byte[memory + Sa]
    or     rax, rax
    jns   .loop0
    neg    rax
 .loop0:
    dec    rsi
    xor    rdx, rdx
    div    rdi
    add    dl, '0'
    mov    [rsi], dl
    or     rax, rax
    jnz   .loop0
 .sign:
    movsx  rax, byte[memory + Sa]
    or     rax, rax
    jns   .pcall
    dec    rsi
    mov    byte[rsi], '-'
 .pcall:
    mov    rax, 1
    mov    rdx, rsp
    add    rdx, 8
    sub    rdx, rsi
    mov    rdi, 1
    syscall
    add    rsp, 8
    jmp   .increment
 .prints:
    xor    rdx, rdx
    add    rdx, Sa
 .loop1:
    movzx  rax, byte[memory + rdx]
    or     rax, rax
    jz    .call00
    inc    rdx
    jmp   .loop1
 .call00:
    sub    rdx, Sa
    mov    rax, 1
    mov    rdi, 1
    mov    rsi, Sa
    add    rsi, memory
    syscall
    jmp   .increment
 .error:
    mov    rax, 1
    mov    rdi, 1
    movzx  rdx, byte[emsg.len]
    mov    rsi, emsg
    syscall                          ;print error message.
 .usage:
    mov    rax, 1
    mov    rdi, 1
    movzx  rdx, byte[use.len]
    mov    rsi, use
    syscall                          ;print usage message
    jmp    .exit
 .exit:
    mov    rax,   60
    xor    rdi,   rdi
    syscall

segment readable    ;rodata
emsg:      db  "An error occured!", 0x0a
 .len:     db   $ - emsg
use:       db  "usage: ", progname, " infile", 0x0a
 .len:     db   $ - use
outfunc:   dq  _start.printc, _start.printb, _start.prints
segment readable writeable    ;bss
fin:
 .name:    rq  1
 .desc:    rq  1
 .siz:     rq  1
buffer0:   rb  membytes

Creating Binary Files Using Common Assemblers

My Subleq8 interpreter is coming along quite nicely. Unlike most such interpreters, mine doesn't read text files directly. It opens binary files and reads them directly into memory. So, we need a way to turn text files into binary files that my interpreter can read. My interpreter is written in x64 assembley with FASM in Linux. I also use NASM to write programs. I like both, but for different reasons. Either assembler can write binary files. Simply write a source file that is just a data segment. So, the hello subleq program on the Subleq page can be assembled with NASM like this.

; Output the character pointed to by p.
    db   a,  a,  $+3
    db   p,  Z,  $+3
    db   Z,  a,  $+3
    db   Z,  Z,  $+3
a:  db   0, -1,  $+3

; Increment p.
    db  m1,  p,  $+3

; Check if p < E.
    db  a,   a,  $+3
    db  E,   Z,  $+3
    db  Z,   a,  $+3
    db  Z,   Z,  $+3
    db  p,   a,   -1

    db  Z,   Z,    0

p:  db  H 
Z:  db  0 
m1: db -1

; Our text "Hello, world!\n" in ASCII codes
H:  db   72, 101,  108
    db  108, 111,   44
    db   32,  87,  111
    db  114, 108,  100
    db   33,  10 
E:  db    E
~$ nasm hello1.s -o hello1.sub
~$ hexdump -C hello1.sub
00000000  0c 0c 03 24 25 06 25 0c  09 25 25 0c 00 ff 0f 26  |...$%.%..%%....&|
00000010  24 12 0c 0c 15 35 25 18  25 0c 1b 25 25 1e 24 0c  |$....5%.%..%%.$.|
00000020  ff 25 25 00 27 00 ff 48  65 6c 6c 6f 2c 20 57 6f  |.%%.'..Hello, Wo|
00000030  72 6c 64 21 0a 35                                 |rld!.5|
00000036
~$ ./subleq hello1.sub
Hello, World!
~$

It's not very pretty code, but it works. However, we can make it pretty with macros.

%define    subleq(a, b, c)    db a, b, c
%define    subleq(a, b)       db a, b, $+3
%define    dat                db

; Output the character pointed to by p.
    subleq(a,  a)
    subleq(p,  Z)
    subleq(Z,  a)
    subleq(Z,  Z)
a:  subleq(0, -1)
; Increment p.
    subleq(m1, p)
; Check if p < E.
    subleq(a,  a)
    subleq(E,  Z)
    subleq(Z,  a)
    subleq(Z,  Z)
    subleq(p,  a, -1)
    subleq(Z,  Z,  0)
p:  dat  H
Z:  dat  0
m1: dat -1
H:  dat "Hello, World!", 10
E:  dat  E

Superstitionfreeblog (talk) 07:10, 30 October 2024 (UTC)

Shebang Support

Adding shebang support to my Subleq8 interpreter shouldn't be too hard. The shebang line must be at the front of the file, so the source can look something like this:

%define    s(a, b, c)    db a, b, c
%define    s(a, b)       db a, b, $+3
%define    dat                db

    db "#!/usr/bin/subleq", 0  ;This line tells the shell what program to use to run this file.
    org $$ - $                 ;Tells the assembler to count this line as address 0
; Output the character pointed to by p.
m:  s(a,  a)
    s(p,  Z)
    s(Z,  a)
    s(Z,  Z)
a:  s(m, -1)
     ...
     ...
     ...

So assemble and see what we have.

~$ nasm hello2.s -o hello2.sub
~$ hexdump -C hello2.sub
00000000  23 21 2f 75 73 72 2f 62  69 6e 2f 73 75 62 6c 65  |#!/usr/bin/suble|
00000010  71 00 0c 0c 03 24 25 06  25 0c 09 25 25 0c 00 ff  |q....$%.%..%%...|
00000020  0f 26 24 12 0c 0c 15 35  25 18 25 0c 1b 25 25 1e  |.&$....5%.%..%%.|
00000030  24 0c ff 25 25 00 27 00  ff 48 65 6c 6c 6f 2c 20  |$..%%.'..Hello, |
00000040  77 6f 72 6c 64 21 0a 35                           |world!.5|
00000048
~$ 

Now when I implement this in the interpreter, I'll need to check first to see if the shebang is there after I load the file into memory. Simply check if the first character in memory is '#", right? I don't think it's that simple. The first byte of our subleq program might be 0x23. If it is, the interpreter will check each byte after looking for the 0, then set the memory to be the next byte. That'll certainly eff our subleq program up and will definitely result in some unexpected behavior. Checking if the first two bytes is "#!" is probably better. As simple as:

mov   ax,  word[buffer]
cmp   ax, 0x2123
je   .somewhere

There's still the chance that an actual subleq program will start with 23, 21. It's a lot less likely than the first case, but we're still gambling that it won't ever happen. To make it even less likely, we can check the first four bytes but ignore the fourth byte, because we want to check the first three bytes for "#!/". If we do this we can't use relative filenames like ./subleq or ~/subleq. The path must start with /. That might be ok, though.

mov   eax, dword[buffer]
and   eax, 0x00ffffff
cmp   eax, 0x002f2123
je   .somewhere

Might be the best we can do.

Superstitionfreeblog (talk) 07:10, 30 October 2024 (UTC)

My Subleq8 interpreter now supports she-bangs. Now, we copy the interpreter to /usr/bin so we can run it from anywhere. With the shebang in the binary file, all we have to do is make the file executable and run it like you would a stand alone executable or a shell script.
~$ cat hello2.s
%define    s(a, b, c)    db a, b, c
%define    s(a, b)       db a, b, $+3
%define    dat                db

    db "#!/usr/bin/subleq", 0
org $$ - $
; Output the character pointed to by p.
m:  s(a,  a)
    s(p,  Z)
    s(Z,  a)
    s(Z,  Z)
a:  s(m, -1)
; Increment p.
    s(m1, p)
; Check if p < E.
    s(a,  a)
    s(E,  Z)
    s(Z,  a)
    s(Z,  Z)
    s(p,  a, -1)
    s(Z,  Z,  m)
p:  dat    H
Z:  dat    0
m1: dat   -1
H:  dat    "Hello, world!", 10
E:  dat     E
~$ nasm hello2.s -o hello2
~$ chmod 755 hello2
~$ ./hello2
Hello, world!
~$ file hello2
hello2: a /usr/bin/subleq script executable (binary data)
~$ hexdump -C hello2
00000000  23 21 2f 75 73 72 2f 62  69 6e 2f 73 75 62 6c 65  |#!/usr/bin/suble|
00000010  71 00 0c 0c 03 24 25 06  25 0c 09 25 25 0c 00 ff  |q....$%.%..%%...|
00000020  0f 26 24 12 0c 0c 15 35  25 18 25 0c 1b 25 25 1e  |.&$....5%.%..%%.|
00000030  24 0c ff 25 25 00 27 00  ff 48 65 6c 6c 6f 2c 20  |$..%%.'..Hello, |
00000040  77 6f 72 6c 64 21 0a 35                           |world!.5|
00000048
~$ 

Superstitionfreeblog (talk) 07:30, 30 October 2024 (UTC)

Example Programs

Hello?

Using Printc

My interpreter has 3 output routines. Printc is the usual routine. It is called by trying to subtract from memory slot -1. It only prints the char pointed to by argument A. sq(message, -1). Argument C is ignored.

%define sq(a, b, c) db a, b, c
%define sq(a, b)    db a, b, $+3
%define dat         db
%define string      db

shebang: string "#!/usr/bin/subleq", 0
org shebang - $

begin:   sq(zero, message, -1)
         sq(message, -1)
         sq(neg1, begin+1)
         sq(neg1, begin+3)
         sq(zero, zero, begin)
message: string "Hello, Esolang!", 0x0a
zero:    dat     0
neg1:    dat    -1

Output:

~$ nasm hello3.s -o hello3.sub
~$ chmod 755 hello3.sub
~$ ./hello3.sub
Hello, Esolang!
~$ 

Using Prints

Prints prints a null terminated string. You call prints by passing -3 as argument B. This one kind of feels like cheating. You don't have to use it, though.

%define _(a, b, c) db a, b, c
%define _(a, b)    db a, b, $+3
%define dat        db

    _(message, -3)
    _(127, 127, -1)    ;Can be any memory slot from 0 to 127.
message: dat "Hello, Esolang!", 10, 0

Output:

~$ nasm hello4.s -o hello4.sub
~$ subleq hello4.sub
Hello, Esolang!
~$ 

Fibonacci Sequence

%define s(a, b, c) db a, b, c
%define s(a, b)    db a, b, $+3
%define dat        db
%define prev       0x50
%define curr       0x51
%define index      0x52
%define swap       0x53
%define zero       0x54
%define out        0x55

db `#!/usr/bin/subleq\0`
org $$-$

begin:    s(zero,  zero)
          s(prev,  prev)
          s(curr,  curr)
          s(neg1,  curr)
          s(index, index)
          s(limit, index)
loop:     s(prev,  zero)
          s(curr,  zero)
          s(out,   out)
          s(zero,  out)
          s(out,   -2)
          s(swap,  swap)
          s(zero,  swap)
          s(zero,  zero)
          s(curr,  zero)
          s(prev,  prev)
          s(zero,  prev)
          s(zero,  zero)
          s(swap,  zero)
          s(curr,  curr)
          s(zero,  curr)
          s(one,   index, -1)
          s(zero,  zero,   loop)
one:      dat      1
neg1:     dat     -1
limit:    dat     -10

Output:

~$ nasm fib.s -o fib.sub
~$ chmod 755 fib.sub
~$ ./fib.sub
1
2
3
5
8
13
21
34
55
89
~$