Abc!?/Python Compiler
Jump to navigation
Jump to search
# Compile Abc!? programs to C
# This is a pretty crappy compiler.
# It works for valid programs, but it cannot help you very much with invalid
# programs.
# Config
marker = 'Abc!?'
extra_info = False
#extra_info = True
# Convert a line of Abc code to C statment(s)
def convert_op(code: str, jump_list: list) -> str:
open_bracket_index = code.find('[')
close_bracket_index = code.find(']')
has_bracket = (open_bracket_index >= 0) or (close_bracket_index >= 0)
conditional = has_bracket and open_bracket_index < close_bracket_index
if has_bracket and ((close_bracket_index < 0) or (open_bracket_index < 0)):
print("ERROR: condition has an unmatched '[' or ']'")
exit(1)
if conditional:
conditional_code = code[open_bracket_index+1:close_bracket_index]
main_code = code[close_bracket_index+1:]
else:
conditional_code = ''
main_code = code
#print('convert_op:', conditional_code, main_code)
result = None
colon_index = main_code.find(':')
if colon_index >= 0:
# Jump op
assert(len(main_code) > colon_index)
result = convert_jump(main_code[colon_index + 1:], jump_list)
else:
# Move op
move_index = main_code.find('>')
if move_index > 0:
result = convert_move(main_code)
else:
print("ERROR: line must be a move operation or a jump operation")
exit(1)
if conditional:
return make_conditional(conditional_code, result)
else:
return result
# return (nextCode, c_condition)
def read_condition(code):
code, unary_1, op1 = read_unary_operand(code)
if not op1:
print("ERROR: condition has no operand")
exit(1)
code, op = read_conditional_operator(code)
if not op:
print("ERROR: condition has no conditional operator")
exit(1)
code, unary_2, op2 = read_unary_operand(code)
if not op2:
print("ERROR: condition has no second operand")
exit(1)
mod_op1 = unary_modify_operand(unary_1, op1)
# Convert operator
if op == '#':
op = '!='
elif op == '=':
op = '=='
mod_op2 = unary_modify_operand(unary_2, op2)
result = ' '.join([mod_op1, op, mod_op2])
return (code, result)
def make_conditional(conditional_code, c_code):
code, c_condition = read_condition(conditional_code)
if code:
print("ERROR: junk at end of condition")
exit(1)
return "if ({}) {{ {} }}".format(c_condition, c_code)
def convert_jump(code, jump_list):
if code not in jump_list:
print('ERROR: there is no line labeled "{}"'.format(code))
exit(1)
c_label = to_label(code)
return 'goto {};'.format(c_label)
def is_var_char(char):
assert(len(char)==1)
return char.isalpha() or char in '!?'
def is_hex(letter):
return letter in 'abcdefABCDEF'
# return (nextCode, operand)
def read_operand(code):
code = code.lstrip()
if not code:
return ('', '')
elif is_var_char(code[0]):
# Variable
return (code[1:], code[0])
elif code[0] == '\\':
# Character literal
return (code[2:], str(ord(code[1])))
else:
# Number literal
i = 0
base = 10
if code[i] == '$':
# Hexadecimal
base = 16
i += 1
while i < len(code) and (code[i].isdigit() or (base==16 and is_hex(code[i]))):
i = i + 1
if i == 0 or (base == 16 and i == 1):
print("ERROR: expected operand")
exit(1)
else:
digits_string = code[:i].lstrip('$')
return (code[i:], str(int(digits_string, base)))
# return (nextCode, operator)
def read_operator(code):
code = code.lstrip()
if code and code[0] in '+-/*&|':
return (code[1:], code[0])
else:
return (code, '')
# return (nextCode, operator)
def read_conditional_operator(code):
code = code.lstrip()
if code and code[0] in '=#<>':
return (code[1:], code[0])
else:
return (code, '')
# return (nextCode, operator)
def read_unary_operator(code):
code = code.lstrip()
if code and code[0] in '*~':
return (code[1:], code[0])
else:
return (code, '')
# return (nextCode, unaryOp, operand)
def read_unary_operand(code):
code, unaryOp = read_unary_operator(code)
code, operand = read_operand(code)
return (code, unaryOp, operand)
# Apply a unary operator to an operand
# Purpose: do type casting for DEREFERENCE
def unary_modify_operand(unary_operator, operand):
if unary_operator == '*':
#the_type = 'int64_t' if operand.isupper() else 'int8_t'
#return '*(({}*) {} + Mem)'.format(the_type, operand)
return '*(Mem + {})'.format(operand)
else:
return unary_operator + operand
# Returns a triple of (pre, during)
def read_left_move(code):
code = code.lstrip()
code, unary_1, op1 = read_unary_operand(code)
code, operator = read_operator(code)
unary2, op2 = None, None
if operator:
code, unary_2, op2 = read_unary_operand(code)
if code.lstrip():
print("ERROR: junk at the end of the left side of the operation: \"", code, "\"")
exit(1)
c_op1 = convert_var(op1)
result = None
if operator:
c_op2 = convert_var(op2)
mod_op1 = unary_modify_operand(unary_1, c_op1)
mod_op2 = unary_modify_operand(unary_2, c_op2)
result = '{} {} {}'.format(mod_op1, operator, mod_op2)
else:
result = unary_modify_operand(unary_1, c_op1)
pre = []
if '?' in (op1, op2):
pre.append('In = getchar()')
if '!' in (op1, op2):
pre.append('_R_ = abcRand()')
return (pre, result)
def convert_var(var_char):
assert(var_char.isdigit() or len(var_char) == 1)
return {'?': 'In',
'!': 'Out'}.get(var_char, var_char)
def read_char(code, x):
if code and code[0] == x:
return (code[1:], x)
else:
return (code, '')
# return (right_result, post_list)
def read_right_move(code):
code = code.lstrip()
code, deref = read_char(code, '>')
if not code:
print("ERROR: missing an operand on the right side")
exit(1)
code, op = read_operand(code)
if code.strip():
print("ERROR: junk at end of line")
exit(1)
post_list = []
if op == '?':
post_list.append('goto Abc_Exit')
elif op == '!':
post_list.append('putchar(Out)')
#print('code(',code,') deref(', deref, ') op(', op, ')')
result = convert_var(op)
if deref:
result = unary_modify_operand('*', result);
return (result, post_list)
def convert_move(code):
move_index = code.find('>')
assert(0 < move_index < (len(code) - 1))
pre_list, left_part = read_left_move(code[:move_index])
right_part, post_list = read_right_move(code[move_index+1:])
total = right_part + ' = ' + left_part
if pre_list:
total = '; '.join(pre_list) + '; ' + total
if post_list:
total = total + '; ' + '; '.join(post_list)
return total + ';'
# Convert a string to a C label
def to_label(text):
out = 'Abc_Label_'
for c in text:
if not c.isalnum() and c != '_':
out += '_{}_'.format(ord(c))
else:
out += c
return out
# Convert string to a list of numbers
def make_numbers(text):
numbers = []
escape = False
for byte in text:
if escape:
escape = False
if byte == ord('0'):
numbers.append(ord('\0'))
elif byte == ord('t'):
numbers.append(ord('\t'))
elif byte == ord('n'):
numbers.append(ord('\n'))
elif byte == ord('b'):
numbers.append(ord('\b'))
elif byte == ord('\\'):
escape = True
else:
numbers.append(byte)
return ','.join([str(x) for x in numbers])
def abc_compile(in_filename, out_filename, marker, extra_info):
with open(in_filename, "r") as in_file:
in_lines = in_file.readlines()
data_lines = []
code_lines = []
marker_line_index = None
for line_i, line in enumerate(in_lines):
if marker_line_index is None:
if line[:5] == marker:
marker_line_index = line_i
else:
data_lines.append(line)
else:
code_lines.append(line)
if marker_line_index is None:
print("ERROR: no", marker, "marker found")
exit(1)
elif extra_info:
print('Found', marker, 'on line', marker_line_index + 1)
# Data should be one byte array
data_string = bytearray(''.join(data_lines), 'ascii')
# Code should have comments and code
code_lines = [line.rstrip().split(';') for line in code_lines]
jump_list = [x[0] if len(x)==2 else '' for x in code_lines]
operation_list = [x[1] if len(x)==2 else '' for x in code_lines]
# Check for duplicate line labels
for i, text1 in enumerate(jump_list):
if not text1:
continue
for j, text2 in enumerate(jump_list):
if i == j:
continue
elif text1 == text2:
code_0 = len(data_lines) + marker_line_index + 1
line1 = code_0 + i
line2 = code_0 + j
print("ERROR: duplicate line label: \"", text1, "\"", sep='')
print(" found on lines #{} and #{}".format(line1, line2))
exit(1)
# Debug print
if extra_info:
print('code',code_lines)
print('data',data_string)
print('---')
# NOTE: this will partially write to the output file if there is an error
with open(out_filename, "w+") as out_file:
def emit(*args, **kwargs):
print(*args, **kwargs, file=out_file)
# Prologue
emit('#include <stdio.h>')
emit('#include <stdint.h>')
emit('static int8_t Mem[] = {{ {} }};'.format(make_numbers(data_string)))
emit('int main() {')
emit(' int8_t In,Out,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z;')
emit(' int64_t A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z;')
# Convert each line of code
for i, code in enumerate(operation_list):
line_num = len(data_lines) + marker_line_index + i + 1
if extra_info:
print("processing line #{}".format(line_num))
comment = jump_list[i]
c_label = to_label(comment)
if not comment and not code:
continue
# Check that there is some text where the operation should be
if not code:
print("ERROR: program line #{} has no code".format(line_num))
continue
code = code.strip()
try:
c_code = convert_op(code, jump_list)
except Exception as e:
print("error occurred on line #{}".format(line_num))
raise e
if extra_info:
emit('/* Line #', line_num, ' */ ', c_label, ': ', c_code, sep='')
else:
emit(c_label, ': ', c_code, sep='')
# Epilogue
emit('Abc_Exit: return In;')
emit('}')
def main():
import sys
if len(sys.argv) != 3:
print("ERROR: usage: {} infile outfile".format(sys.argv[0]))
exit(1)
in_filename = sys.argv[1]
out_filename = sys.argv[2]
abc_compile(in_filename, out_filename, marker, extra_info)
if __name__ == '__main__':
main()