Abc!?/Python Compiler

From Esolang
Jump to navigation Jump to search
Back to Abc!?
# Compile Abc!? programs to C
# This is a pretty crappy compiler.
# It works for valid programs, but it cannot help you very much with invalid
# programs.

# Config
marker = 'Abc!?'
extra_info = False
#extra_info = True


# Convert a line of Abc code to C statment(s)
def convert_op(code: str, jump_list: list) -> str:
    open_bracket_index = code.find('[')
    close_bracket_index = code.find(']')
    has_bracket = (open_bracket_index >= 0) or (close_bracket_index >= 0)
    conditional = has_bracket and open_bracket_index < close_bracket_index
    if has_bracket and ((close_bracket_index < 0) or (open_bracket_index < 0)):
        print("ERROR: condition has an unmatched '[' or ']'")
        exit(1)
    if conditional:
        conditional_code = code[open_bracket_index+1:close_bracket_index]
        main_code = code[close_bracket_index+1:]
    else:
        conditional_code = ''
        main_code = code

    #print('convert_op:', conditional_code, main_code)

    result = None
    colon_index = main_code.find(':')
    if colon_index >= 0:
        # Jump op
        assert(len(main_code) > colon_index)
        result = convert_jump(main_code[colon_index + 1:], jump_list)
    else:
        # Move op
        move_index = main_code.find('>')
        if move_index > 0:
            result = convert_move(main_code)
        else:
            print("ERROR: line must be a move operation or a jump operation")
            exit(1)

    if conditional:
        return make_conditional(conditional_code, result)
    else:
        return result


# return (nextCode, c_condition)
def read_condition(code):
    code, unary_1, op1 = read_unary_operand(code)
    if not op1:
        print("ERROR: condition has no operand")
        exit(1)
    code, op = read_conditional_operator(code)
    if not op:
        print("ERROR: condition has no conditional operator")
        exit(1)
    code, unary_2, op2 = read_unary_operand(code)
    if not op2:
        print("ERROR: condition has no second operand")
        exit(1)

    mod_op1 = unary_modify_operand(unary_1, op1)

    # Convert operator
    if op == '#':
        op = '!='
    elif op == '=':
        op = '=='

    mod_op2 = unary_modify_operand(unary_2, op2)

    result = ' '.join([mod_op1, op, mod_op2])

    return (code, result)


def make_conditional(conditional_code, c_code):
    code, c_condition = read_condition(conditional_code)
    if code:
        print("ERROR: junk at end of condition")
        exit(1)
    return "if ({}) {{ {} }}".format(c_condition, c_code)


def convert_jump(code, jump_list):
    if code not in jump_list:
        print('ERROR: there is no line labeled "{}"'.format(code))
        exit(1)
    c_label = to_label(code)
    return 'goto {};'.format(c_label)


def is_var_char(char):
    assert(len(char)==1)
    return char.isalpha() or char in '!?'


def is_hex(letter):
    return letter in 'abcdefABCDEF'


# return (nextCode, operand)
def read_operand(code):
    code = code.lstrip()
    if not code:
        return ('', '')
    elif is_var_char(code[0]):
        # Variable
        return (code[1:], code[0])
    elif code[0] == '\\':
        # Character literal
        return (code[2:], str(ord(code[1])))
    else:
        # Number literal
        i = 0
        base = 10
        if code[i] == '$':
            # Hexadecimal
            base = 16
            i += 1
        while i < len(code) and (code[i].isdigit() or (base==16 and is_hex(code[i]))):
            i = i + 1
        if i == 0 or (base == 16 and i == 1):
            print("ERROR: expected operand")
            exit(1)
        else:
            digits_string = code[:i].lstrip('$')
            return (code[i:], str(int(digits_string, base)))


# return (nextCode, operator)
def read_operator(code):
    code = code.lstrip()
    if code and code[0] in '+-/*&|':
        return (code[1:], code[0])
    else:
        return (code, '')


# return (nextCode, operator)
def read_conditional_operator(code):
    code = code.lstrip()
    if code and code[0] in '=#<>':
        return (code[1:], code[0])
    else:
        return (code, '')

# return (nextCode, operator)
def read_unary_operator(code):
    code = code.lstrip()
    if code and code[0] in '*~':
        return (code[1:], code[0])
    else:
        return (code, '')


# return (nextCode, unaryOp, operand)
def read_unary_operand(code):
    code, unaryOp = read_unary_operator(code)
    code, operand = read_operand(code)
    return (code, unaryOp, operand)


# Apply a unary operator to an operand
# Purpose: do type casting for DEREFERENCE
def unary_modify_operand(unary_operator, operand):
    if unary_operator == '*':
        #the_type = 'int64_t' if operand.isupper() else 'int8_t'
        #return '*(({}*) {} + Mem)'.format(the_type, operand)
        return '*(Mem + {})'.format(operand)
    else:
        return unary_operator + operand


# Returns a triple of (pre, during)
def read_left_move(code):
    code = code.lstrip()

    code, unary_1, op1 = read_unary_operand(code)
    code, operator = read_operator(code)
    unary2, op2 = None, None
    if operator:
        code, unary_2, op2 = read_unary_operand(code)

    if code.lstrip():
        print("ERROR: junk at the end of the left side of the operation: \"", code, "\"")
        exit(1)

    c_op1 = convert_var(op1)

    result = None
    if operator:
        c_op2 = convert_var(op2)
        mod_op1 = unary_modify_operand(unary_1, c_op1)
        mod_op2 = unary_modify_operand(unary_2, c_op2)
        result = '{} {} {}'.format(mod_op1, operator, mod_op2)
    else:
        result = unary_modify_operand(unary_1, c_op1)

    pre = []
    if '?' in (op1, op2):
        pre.append('In = getchar()')
    if '!' in (op1, op2):
        pre.append('_R_ = abcRand()')

    return (pre, result)


def convert_var(var_char):
    assert(var_char.isdigit() or len(var_char) == 1)
    return {'?': 'In',
            '!': 'Out'}.get(var_char, var_char)


def read_char(code, x):
    if code and code[0] == x:
        return (code[1:], x)
    else:
        return (code, '')


# return (right_result, post_list)
def read_right_move(code):
    code = code.lstrip()
    code, deref = read_char(code, '>')
    if not code:
        print("ERROR: missing an operand on the right side")
        exit(1)
    code, op = read_operand(code)

    if code.strip():
        print("ERROR: junk at end of line")
        exit(1)

    post_list = []
    if op == '?':
        post_list.append('goto Abc_Exit')
    elif op == '!':
        post_list.append('putchar(Out)')

    #print('code(',code,') deref(', deref, ') op(', op, ')')

    result = convert_var(op)
    if deref:
        result = unary_modify_operand('*', result);

    return (result, post_list)


def convert_move(code):
    move_index = code.find('>')
    assert(0 < move_index < (len(code) - 1))

    pre_list, left_part = read_left_move(code[:move_index])
    right_part, post_list = read_right_move(code[move_index+1:])

    total = right_part + ' = ' + left_part
    if pre_list:
        total = '; '.join(pre_list) + '; ' + total
    if post_list:
        total = total + '; ' + '; '.join(post_list)
    return total + ';'


# Convert a string to a C label
def to_label(text):
    out = 'Abc_Label_'
    for c in text:
        if not c.isalnum() and c != '_':
            out += '_{}_'.format(ord(c))
        else:
            out += c
    return out


# Convert string to a list of numbers
def make_numbers(text):
    numbers = []
    escape = False
    for byte in text:
        if escape:
            escape = False
            if byte == ord('0'):
                numbers.append(ord('\0'))
            elif byte == ord('t'):
                numbers.append(ord('\t'))
            elif byte == ord('n'):
                numbers.append(ord('\n'))
            elif byte == ord('b'):
                numbers.append(ord('\b'))
        elif byte == ord('\\'):
            escape = True
        else:
            numbers.append(byte)
    return ','.join([str(x) for x in numbers])


def abc_compile(in_filename, out_filename, marker, extra_info):
    with open(in_filename, "r") as in_file:
        in_lines = in_file.readlines()

    data_lines = []
    code_lines = []
    marker_line_index = None
    for line_i, line in enumerate(in_lines):
        if marker_line_index is None:
            if line[:5] == marker:
                marker_line_index = line_i 
            else:
                data_lines.append(line)
        else:
            code_lines.append(line)

    if marker_line_index is None:
        print("ERROR: no", marker, "marker found")
        exit(1)
    elif extra_info:
        print('Found', marker, 'on line', marker_line_index + 1)

    # Data should be one byte array
    data_string = bytearray(''.join(data_lines), 'ascii')

    # Code should have comments and code
    code_lines = [line.rstrip().split(';') for line in code_lines]
    jump_list = [x[0] if len(x)==2 else '' for x in code_lines]
    operation_list = [x[1] if len(x)==2 else '' for x in code_lines]

    # Check for duplicate line labels
    for i, text1 in enumerate(jump_list):
        if not text1:
            continue
        for j, text2 in enumerate(jump_list):
            if i == j:
                continue
            elif text1 == text2:
                code_0 = len(data_lines) + marker_line_index + 1
                line1 = code_0 + i
                line2 = code_0 + j
                print("ERROR: duplicate line label: \"", text1, "\"", sep='')
                print("       found on lines #{} and #{}".format(line1, line2))
                exit(1)

    # Debug print
    if extra_info:
        print('code',code_lines)
        print('data',data_string)
        print('---')

    # NOTE: this will partially write to the output file if there is an error
    with open(out_filename, "w+") as out_file:
        def emit(*args, **kwargs):
            print(*args, **kwargs, file=out_file)
        # Prologue
        emit('#include <stdio.h>')
        emit('#include <stdint.h>')
        emit('static int8_t Mem[] = {{ {} }};'.format(make_numbers(data_string)))
        emit('int main() {')
        emit('    int8_t In,Out,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z;')
        emit('    int64_t A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z;')
        # Convert each line of code
        for i, code in enumerate(operation_list):
            line_num = len(data_lines) + marker_line_index + i + 1
            if extra_info:
                print("processing line #{}".format(line_num))
            comment = jump_list[i]
            c_label = to_label(comment)
            if not comment and not code:
                continue
            # Check that there is some text where the operation should be
            if not code:
                print("ERROR: program line #{} has no code".format(line_num))
                continue
            code = code.strip()
            try:
                c_code = convert_op(code, jump_list)
            except Exception as e:
                print("error occurred on line #{}".format(line_num))
                raise e
            if extra_info:
                emit('/* Line #', line_num, ' */ ', c_label, ': ', c_code, sep='')
            else:
                emit(c_label, ': ', c_code, sep='')
        # Epilogue
        emit('Abc_Exit: return In;')
        emit('}')


def main():
    import sys
    if len(sys.argv) != 3:
        print("ERROR: usage: {} infile outfile".format(sys.argv[0]))
        exit(1)
    in_filename = sys.argv[1]
    out_filename = sys.argv[2]
    abc_compile(in_filename, out_filename, marker, extra_info)


if __name__ == '__main__':
    main()