MONOD/interpreterv1

From Esolang
Jump to navigation Jump to search
 #       __  __  ___  _   _  ___  ____  
 #      |  \/  |/ _ \| \ | |/ _ \|  _ \ 
 #      | |\/| | | | |  \| | | | | | | |
 #      | |  | | |_| | |\  | |_| | |_| |
 #      |_|  |_|\___/|_| \_|\___/|____/ 
 #      robert harry nicodemus williams
 #
 #      Mayday 2009 -- 
 #
 #   GOALS:
 #    (Trivial)   [ ] 2 + 2 = 4
 #    (Easy)      [ ] Hello, World!
 # (Pretty easy?) [ ] Fibonacci
 #    (hard?)     [ ] Brainfuck


import random, copy, string, sys

CODONLENGTH = 3

class Code:
  def __init__(self):
    global basealphabet
    basealphabet = ("A","T","G","C")
    global operatorlist
    operatorlist = ("NULL","FOOO","LYSE","META","CATA","PHOS","DEPH","COPH","BIND","BEND","BACK","TRAN","BEGN","STOP","SENS","JESU")

    # reminders for what these are tentatively supposed to do:
    #
    # NULL: 0
    # FOOO: a variant of NULL
    #
    # LYSE: Destroy the protein /(it is bound to)
    #
    # META + number: Increment chemical_{number} by phosphorylation status
    # CATA + number: Decrement blah blah
    #
    # PHOS: increment phosphorylation of protein /(it is bound to)
    # DEPH: decrement blah blah
    # COPH: COPH n -> phosphorylate if register n is nonzero
    #
    # BIND: mark start of binding sequence
    # BEND: mark end of binding sequence
    # BACK: unbind proteins and attempt to bind to first protein with binding sequence following.
    #
    # TRAN: this one starts transcription of proteins - protein under construction is automatically BACK-bound to creating protein and should prob. be BACK-unbound after 
    # BEGN: a new protein begins here: make operons (not yet) with BIND *brbrbrbrbr BEND BEGN PROT1 PROT1 BEGN PROT2 PROT2 STOP
    # STOP: transcription ceases entirely
    #
    # SENS: "SENS n" : register[n] -> phosphorylation
    # JESU: "JESU n" : n -> register[phosphorylation]
    #--------------------------------------------------------------------
    #proposition 
    #TAKE: TAKE n would halt computation and ask for a value to put into memory address n, then continue. or maybe phos status
    #KINA: ph(-1) self, ph(+1) bound, if unbound, ph(0)
    #
    #
    # Kadmon protein: transcribes *tatgag protein into existence, releases protein (BACK) then kills itself
    # TRAN BIND *tatgag BEND BACK LYSE
    #
    # genome then is like:
    # NULL NULL NULL NULL NULL BIND *tatgag BEND BEGN FOOO FOOO FOOO STOP NULL NULL NULL

    #this is painfully pre v1.0,
    #           ("NULL","FOOO","LYSE","META","CATA","PHOS","DEPH","COPH","BIND","BEND","BACK","TRAN","BEGN","STOP","SENS","JESU")
   #canonical = ["AAA" ,"TTT" ,"CAG" ,"TAC" ,"CAT" ,"CCC" ,"GGG" ,"GAC", "TAA" ,"ATA" ,"AAT" ,"CGG" ,"GCG" ,"GGC", "ACG", "ACT" ]
    
    #for turning ???? back into DNA
    global ballscodons
    ballscodons = ("ACA","ACC")
    
    global codebook
    #pre v1.0 again
    #codebook = dict(zip(canonical,operatorlist))
    
    #operatorlist = ("NULL","FOOO","LYSE","META","CATA","PHOS","DEPH","COPH","BIND","BEND","BACK","TRAN","BEGN","STOP","SENS","JESU")
    
    codebook = {}

    codebook["AAA"] = "NULL";codebook["AAC"] = "NULL";codebook["AAG"] = "NULL"
    
    codebook["TTA"] = "FOOO";codebook["TTT"] = "FOOO";codebook["TTC"] = "FOOO";codebook["TTG"] = "FOOO"

    codebook["CAC"] = "LYSE";codebook["CAG"] = "LYSE"

    codebook["TAG"] = "META";codebook["TAC"] = "META"

    codebook["CAT"] = "CATA";codebook["CAA"] = "CATA"

    codebook["CCA"] = "PHOS";codebook["CCT"] = "PHOS";codebook["CCC"] = "PHOS";codebook["CCG"] = "PHOS"

    codebook["GGG"] = "DEPH"

    codebook["GAA"] = "COPH";codebook["GAT"] = "COPH";codebook["GAC"] = "COPH";codebook["GAG"] = "COPH"

    codebook["TAA"] = "BIND";codebook["TAT"] = "BIND" 
                         
    codebook["ATT"] = "BEND";codebook["ATC"] = "BEND";codebook["ATA"] = "BEND"


    codebook["AAT"] = "BACK"

    codebook["CGA"] = "TRAN";codebook["CGT"] = "TRAN";codebook["CGC"] = "TRAN";codebook["CGG"] = "TRAN"

    codebook["GCA"] = "BEGN";codebook["GCT"] = "BEGN";codebook["GCG"] = "BEGN";codebook["GCC"] = "BEGN"

    codebook["GGC"] = "STOP";codebook["GGA"] = "STOP";codebook["GGT"] = "STOP"

    codebook["ACG"] = "SENS"

    codebook["ACT"] = "JESU"

    # for errors
    default = "default"
    codebook["default"] = "????"

  def codon_to_integer(self,codon):
    """Converts a given codon into an unsigned integer.
       Each member of the alphabet is given a number, which gives each
       codon an n-ary value, which can be trivially converted to decimal"""
    #for 0-length inputs
    if len(codon) == 0:
      return 0
    
    numbase = len(basealphabet)
    indexdict = dict(zip(basealphabet,range(numbase)))
    
    #print codon
    numberlist = []
    integer = 0
    for letter in codon:
      numberlist.append(str(indexdict[letter]))
    #print numberlist
    integer = int("".join(numberlist),numbase)
    return integer
    
  def integer_to_codon(self,integer):
   """Takes an integer and converts it to a codon of approved length.
      If given an integer greater than is possible to express in the 
      base used, this function returns 0. If given an integer too low
      it will pad it with whatever is mapped to 0. """
   integer = int(integer)
   numbase = len(basealphabet)
   
   maxint = 0
   for i in range(numbase):
     maxint = maxint + (numbase**(numbase-i))
   if integer > maxint:
     return 0
   indexdict = dict(zip(range(numbase),basealphabet))
   
   wantednum = []
   for i in range(numbase-1):
     wantednum.append(indexdict[integer%numbase])
     integer = integer/numbase
   wantednum.reverse()
   wantedcodon = "".join(wantednum)
   return wantedcodon
   
  def codon_to_operator(self,codon):
    """Takes a codon, returns its operator"""
    operator = ""
    operator = codebook.get(codon)
    if operator == None:
      operator = "????"
    return operator

  def operator_to_codon(self,operator):
    """Takes an operator, returns the codon"""
    codon = ""
    for k in codebook:
      if codebook[k] == operator:
        return k
    raise ValueError

  def tokenize_dna(self,sequence):
    """Takes a list of strings in the alphabet and returns a tokenised version of it, i.e:
       CODONLENGTH length codons, with noncoding sequences put into large tokens, also dealing
       with fragments and stop codons"""
    pointer = 0
    tokens = []
    oncodons = ("ATA","ATT","ATC")  #(self.operator_to_codon("BEND"))
    offcodons = ("TAA","TAT")  #(self.operator_to_codon("BIND"))
    
    while pointer < len(sequence):
      ## if there's more than CODONLENGTH bases left in the sequence...
      if len(sequence) - pointer >= CODONLENGTH:

        subpointer = 0
        tempcodon = []
        while len(tempcodon) < CODONLENGTH:
          tempcodon.extend(sequence[pointer+subpointer])
          subpointer = subpointer + 1

        newcodon = "".join(tempcodon)
        
        # loop for binding sequences
        if newcodon in offcodons:
          temp = []
          tokens.append(newcodon)
          # until we reach a debinding sequence
          while newcodon not in oncodons:

            subpointer = 0
            tempcodon = []

            while len(tempcodon) < CODONLENGTH:
            # has to do with BIND with no closing BEND i think
              tempcodon.extend(sequence[pointer+subpointer])
              subpointer = subpointer + 1 
              
            newcodon = "".join(tempcodon)

            temp.append(newcodon)
            pointer = pointer + CODONLENGTH
          # when we do, we can finally dump all the codons we kept into a token, removing the signal
          # codons, of course!
          temp = temp[1:-1]
          temp = "".join(temp)
          # a hunch - okay this works, I don't know why. The tokeniser ignores the codon directly after BEND
          # unless you move this back a codon - presumably it goes one too far at some final step
          pointer = pointer - CODONLENGTH
          tokens.append(temp)
      
      # on the other hand, if there's less than three left, there's a fragment to deal with later...
      if len(sequence) - pointer < CODONLENGTH:
       
       fraglength = (len(sequence) - pointer)
       subpointer = 0
       tempcodon = []
       while len(tempcodon) <= fraglength:
         tempcodon.extend(sequence[pointer+subpointer])
         subpointer = subpointer + 1
       
       newcodon = "".join(tempcodon)
       
       tokens.append(newcodon)
       return tokens
       
      tokens.append(newcodon)
      pointer = pointer + CODONLENGTH
 
    return tokens
    
  def parse_tokens(self,tokenstring):
    """Finally a little semantics. Goes through tokens and assigns operators et al. to them.
       Numbers are put where numbers should be, fragments are Noneified, binding sites marked.
       Meaningless codons get "????" (from codon_to_operator)
       """
    parsedstring = copy.deepcopy(tokenstring)
    numberfollows = ("TAG","TAC","CAT","CAA","ACG","GAA","GAT","GAC","GAG","ACT")
    offcodons = ("TAA","TAT")
    
    pointer = 0
    while pointer < len(parsedstring):
      #ignore fragments
      if len(parsedstring[pointer]) < CODONLENGTH:
        parsedstring[pointer] = None
      #binding sites
      elif parsedstring[pointer] in offcodons:
        parsedstring[pointer] = self.codon_to_operator(parsedstring[pointer])
        pointer = pointer + 1
        parsedstring[pointer] = "*" + parsedstring[pointer].lower()     
      #numerical codes after specified codons
      elif parsedstring[pointer] in numberfollows:
        parsedstring[pointer] = self.codon_to_operator(parsedstring[pointer])
        pointer = pointer + 1
        parsedstring[pointer] = self.codon_to_integer(parsedstring[pointer])
      #normal codons
      else:
        parsedstring[pointer] = self.codon_to_operator(parsedstring[pointer])
      pointer = pointer + 1
    return parsedstring
  
  def text_to_gene(self,text):
    """converts a string into a gene representation. Useful for such as
     meaningful binding sequences"""
    message = ""
    for letter in text:
      message = message + self.integer_to_codon(ord(letter))      
    return message

  def gene_to_text(self,gene):
    """Reverses gene_to_text. Issues with non-letter characters."""
    plaintext = ""
    cdns = []
    pointer = 0
    while pointer < len(gene):
      cdn = ""
      subpointer = 0
      while subpointer < CODONLENGTH:
        cdn = cdn + gene[pointer+subpointer]
        subpointer = subpointer + 1 
      pointer = pointer + CODONLENGTH
      cdns.append(cdn)
    cdns = [chr(self.codon_to_integer(cd)+64) for cd in cdns]
    plaintext = "".join(cdns)
    return plaintext
            
class Genome(Code):
  def __init__(self,okay=[]):
    self.genome = okay
    self.tokenized = []
    self.parsed = []
    if okay:
      self.tokenize_genome()
      self.parse_genome()
      self.id_bindingsites()

  def id_bindingsites(self):
    bsites = []
    pointer = 0
    while pointer < len(self.parsed):
      if str(self.parsed[pointer])[0] == "*":
        bsites.append( (self.parsed[pointer],pointer) )
      pointer = pointer + 1    
    self.bindingsites = bsites
    return 1
    
  #=============================
  #GENOME CONSTRUCTING FUNCTIONS
  #=============================
     
  def add_gene(self,genestring):
    """Adds an arbitrary sequence to the end of the genome"""
    self.genome.extend( list(genestring.upper()) )
    return 1
    
  #probably obsolete because of add_operators
  def add_operator(self,operator):
    """Adds a specific codon to the end of the genome"""
    genestring = self.operator_to_codon(operator)
    self.add_gene(genestring)
    return 1

  def add_operators(self,codons):
    """Takes a tuple/list of operators and adds them in order"""
    for c in codons:
      self.add_operator(c)
    return 1
   
  
  #combine these two into one function eventually probably
  def add_random_genes(self,length):
    """Takes number of codons requested and adds them directly to genome.
       Generated randomly over whole alphabet."""
    randgen = []
    for i in xrange(length*CODONLENGTH):
      randgen.append(random.choice(basealphabet))
    randgen = "".join(randgen)
    self.add_gene(randgen)
    return 1

  def add_random_operators(self,length):
    """Takes number of codons requested and adds them directly to genome.
       Taken from operator list"""
    for i in xrange(length):
      thing = random.choice(operatorlist)
      genome. add_operator(thing)
    return 1
       
  #=================
  #PARSING FUNCTIONS
  #=================
  
  def tokenize_genome(self):
    self.tokenized =  self.tokenize_dna(self.genome)
    return 1
 
  def parse_genome(self):
    self.parsed = self.parse_tokens(self.tokenized)
    return 1
 
 
class Protein():
  def __init__(self,struct,phos=0,nom="",index=0):
    
    #static elements
    randnum = random.randrange(100)
    if nom:
      self.name = nom
    else:
      balls = random.choice(string.uppercase)+random.choice(string.lowercase)+random.choice(string.lowercase)
      appendletter = chr(65 + index)
      self.name = balls + appendletter
    
    self.id = randnum    
    self.structure = struct
    self.bindingsites = [operator for operator in self.structure if str(operator)[0] == "*"]

    #dynamic elements
    self.boundto = []
    self.phos = 0
  
  def phosphorylate(self, value=1):
    """alter phosphorylation status of protein by value"""
    self.phos = self.phos + value
    return self.phos
    
class Cell(Protein,Genome):
  def __init__(self,primer=["NULL"],memorysize=64,looplimit=4):
    
    #initialise the chemical array
    self.chemarray = []
    for register in range(memorysize):
      self.chemarray.append(0)

    #initialise the protein array
    self.protarray = []
    
    #add the Kadmon protein
    self.protkadmon = Protein(primer,0,"KadA")
    self.protarray.append(self.protkadmon)
    
    print "Priming with %s (Kadmon)! %s" % (self.protkadmon.name,primer)
    if primer[-1] != "LYSE":
      print "    CAUTION! %s (Kadmon) is not LYSE terminated!" % self.protkadmon.name
    print "Starting to execute %s (Kadmon)" % self.protkadmon.name
    self.execute_protein(self.protkadmon)
    print "Finished executing %s (Kadmon)" % self.protkadmon.name

    #if a protein without a LYSE is added, this stops it after looplimit terms.
    #Repress loop-limiting by setting looplimit = -1 upon instantiation
    
    lastprotein = self.protkadmon
    loopcaution = 0
    
    while self.protarray:
     currentprotein = self.protarray[0]
     #check to see if the same protein is at the top
     if currentprotein == lastprotein:
       #if so, increment the loop counter and warn
       loopcaution = loopcaution + 1
       print "%s executes again (%s times)" % (currentprotein.name, loopcaution)
     print "Starting to execute %s." % currentprotein.name
     self.execute_protein(currentprotein)
     print "Finished executing %s." % currentprotein.name
     lastprotein = currentprotein
     #halt unruly proteins
     if currentprotein == lastprotein:
       if loopcaution >= looplimit:
         print "Loop limit reached. Lysing %s externally." % currentprotein.name
         self.protarray.pop(0)
    
    multiprint("Tableau des proteines est vide. Fin.","Protein array empty. Stopping.")
     
    #display final chem memory state
    print ""
    print "Final chemical memory state:"
    if len(self.chemarray) != 64:
      print self.chemarray
    else:
      for i in xrange(8):
        if suppresstext:
          if max(self.chemarray) < 10:
            print "%s %s %s %s %s %s %s %s" % (self.chemarray[i*8],self.chemarray[i*8+1],self.chemarray[i*8+2],self.chemarray[i*8+3],self.chemarray[i*8+4],self.chemarray[i*8+5],self.chemarray[i*8+6],self.chemarray[i*8+7])
          elif max(self.chemarray) > 11 and max(self.chemarray) < 100:
            print "% 2s % 2s % 2s % 2s % 2s % 2s % 2s % 2s" % (self.chemarray[i*8],self.chemarray[i*8+1],self.chemarray[i*8+2],self.chemarray[i*8+3],self.chemarray[i*8+4],self.chemarray[i*8+5],self.chemarray[i*8+6],self.chemarray[i*8+7])
          else:
            print "% 3s % 3s % 3s % 3s % 3s % 3s % 3s % 3s" % (self.chemarray[i*8],self.chemarray[i*8+1],self.chemarray[i*8+2],self.chemarray[i*8+3],self.chemarray[i*8+4],self.chemarray[i*8+5],self.chemarray[i*8+6],self.chemarray[i*8+7])
        else:
          if max(self.chemarray) < 10:
            print "%s %s %s %s %s %s %s %s        %s %s %s %s %s %s %s %s" % (self.chemarray[i*8],self.chemarray[i*8+1],self.chemarray[i*8+2],self.chemarray[i*8+3],self.chemarray[i*8+4],self.chemarray[i*8+5],self.chemarray[i*8+6],self.chemarray[i*8+7],chr(self.chemarray[i*8]+64),chr(self.chemarray[i*8+1]+64),chr(self.chemarray[i*+2]+64),chr(self.chemarray[i*8+3]+64),chr(self.chemarray[i*8+4]+64),chr(self.chemarray[i*8+5]+64),chr(self.chemarray[i*8+6]+64),chr(self.chemarray[i*8+7]+64))
          elif max(self.chemarray) > 11 and max(self.chemarray) < 100:
            print "% 2s % 2s % 2s % 2s % 2s % 2s % 2s % 2s        %s %s %s %s %s %s %s %s" % (self.chemarray[i*8],self.chemarray[i*8+1],self.chemarray[i*8+2],self.chemarray[i*8+3],self.chemarray[i*8+4],self.chemarray[i*8+5],self.chemarray[i*8+6],self.chemarray[i*8+7],chr(self.chemarray[i*8]+64),chr(self.chemarray[i*8+1]+64),chr(self.chemarray[i*+2]+64),chr(self.chemarray[i*8+3]+64),chr(self.chemarray[i*8+4]+64),chr(self.chemarray[i*8+5]+64),chr(self.chemarray[i*8+6]+64),chr(self.chemarray[i*8+7]+64))
          else:
            print "% 3s % 3s % 3s % 3s % 3s % 3s % 3s % 3s        %s %s %s %s %s %s %s %s" % (self.chemarray[i*8],self.chemarray[i*8+1],self.chemarray[i*8+2],self.chemarray[i*8+3],self.chemarray[i*8+4],self.chemarray[i*8+5],self.chemarray[i*8+6],self.chemarray[i*8+7],chr(self.chemarray[i*8]+64),chr(self.chemarray[i*8+1]+64),chr(self.chemarray[i*+2]+64),chr(self.chemarray[i*8+3]+64),chr(self.chemarray[i*8+4]+64),chr(self.chemarray[i*8+5]+64),chr(self.chemarray[i*8+6]+64),chr(self.chemarray[i*8+7]+64))
  def change_chemarray(self,index,quantity):
    self.chemarray[index] = self.chemarray[index] + quantity
   
  def display_protarray(self):
    nice = ""
    for pro in self.protarray:
      nice = nice + " " + pro.name
    print "Protein Array: " + nice
    
  def execute_protein(self,prot):
    passcodons = ["NULL","FOOO","???","STOP","NULL","BIND","BEND","BEGN"]
    pointer = 0
    """Walks through a protein and executes it """
    while pointer < len(prot.structure):
      
      #PASS THROUGH PASSING CODONS IN passcodons      
      if prot.structure[pointer] in passcodons:
        multiprint("    %s fait rien (%s)" % (prot.name, prot.structure[pointer]),"    %s does nothing (%s)" % (prot.name, prot.structure[pointer]))
        pass

      #AUTOLYSIS!
      if prot.structure[pointer] == "LYSE":
        #lyse bound protein
        if prot.boundto:
          self.protarray.remove(prot.boundto) 
          print "    %s lyses bound %s." % (prot.name,prot.boundto.name)
          prot.boundto = []
        #or else itself
        else:
          self.protarray.remove(prot)
          multiprint("    %s se lyse." % prot.name,"    %s lyses itself." % prot.name)
          break

      #MAKING NEW PROTEINS
      if prot.structure[pointer] == "TRAN":
        
        #look for next binding site on protein
        nextbindingsite = ""
        pointer = pointer + 1
        while str(prot.structure[pointer])[0] != "*":
          pointer = pointer + 1
        nextbindingsite = prot.structure[pointer]
        
        #find matching binding site(s) on genome:
        relevantbindindices = []
        for entry in genome.bindingsites:
          if entry[0] == nextbindingsite:
            relevantbindindices.append(entry[1])
        
        multiprint("    %s trouve %s \"%s\" dans genome." % (prot.name, len(relevantbindindices), nextbindingsite),"    %s finds %s \"%s\" in genome." % (prot.name, len(relevantbindindices), nextbindingsite))
        
        for entry in relevantbindindices:
          transpointer = entry
          while genome.parsed[transpointer] != "BEGN":
            transpointer = transpointer + 1
          tempnewstruct = []
          while genome.parsed[transpointer] != "STOP":
            transpointer = transpointer + 1
            tempnewstruct.append(genome.parsed[transpointer])
          
          if not tempnewstruct:
            #fall back on this
            newstruct = ["NULL"]
          else:
            #remove trailing STOP as well
            newstruct = tempnewstruct[:-1]
          #make the new protein
          self.protarray.append(Protein(newstruct,0))
          #bind the new protein to the TRANsing protein
          prot.boundto = self.protarray[-1]

          multiprint("    Nouvelle Proteine %s! %s (offset du genome %s)" % (prot.boundto.name,prot.boundto.structure,entry),"    New Protein %s! %s (genome offset %s)" % (prot.boundto.name,prot.boundto.structure,entry))
          if prot.boundto.structure[-1] != "LYSE":
            multiprint("    ATTENTION! %s n'est pas n'est pas resilie par LYSE!" % prot.boundto.name,"    CAUTION! %s is not LYSE terminated!" % prot.boundto.name)
      #BACK STUFF NEEDS DOING PROPERLY
      if prot.structure[pointer] == "BACK":
       #unbind bound protein
       if prot.boundto:
         temp = prot.boundto.name
         prot.boundto = []
         multiprint("    %s detache %s!" % (prot.name,temp),"    %s unbinds %s!" % (prot.name,temp))
       else:
         print("    %s unbound nothing (BACK)" % (prot.name))
       #check to see if next codon is binding sequence
       if prot.structure[pointer+1] == "BIND":
         pointer = pointer + 2
         backbind = prot.structure[pointer]
         print "    %s activates binding site %s" % (prot.name,backbind)

         #remove self from the list of proteins under consideration
         goodlist = [protein for protein in self.protarray if protein.name != prot.name]
         #filter this list for proteins containing the binding site ID'd in backbind
         betterlist = [bro for bro in goodlist if backbind in bro.bindingsites]
         #bind to the first member of this array
         if betterlist:
           prot.boundto = betterlist[0]
           print "    %s binds %s through %s" % (prot.name,prot.boundto.name,backbind)
         #skip into BEND, the incrementer at the end will pick it up
         else:
           print "    Binding site %s on %s finds no target." % (backbind, prot.name)
         pointer = pointer + 1
       
      #PHOSPHORYLATE
      if prot.structure[pointer] == "PHOS":
        if prot.boundto:
          prot.boundto.phosphorylate()
          print "    %s increases phosphorylation of %s to %s" % (prot.name,prot.boundto.name,prot.boundto.phos)
        else:
          prot.phosphorylate()
          multiprint("    %s s'augment sa phosphorylation de %s" % (prot.name,prot.phos),"    %s increased phosphorylation to %s" % (prot.name,prot.phos))

      #DEPHOSPHORYLATE
      if prot.structure[pointer] == "DEPH":
        if prot.boundto:
          prot.boundto.phosphorylate(-1)
          print "    %s decreases phosphorylation of %s to %s" % (prot.name,prot.boundto.name,prot.boundto.phos)
        else:
          prot.phosphorylate(-1)
          multiprint("    %s se diminue sa phosphorylation de %s" % (prot.name,prot.phos),\
        "    %s decreased phosphorylation to %s" % (prot.name,prot.phos))

      #CONDITIONAL PHOSPHORYLATE
      if prot.structure[pointer] == "COPH":
        pointer = pointer + 1
        if self.chemarray[prot.structure[pointer]] != 0:
          if prot.boundto:
            prot.boundto.phosphorylate()
            print("    %s increased phosphorylation of %s to %s -- Register %s nonzero" % (prot.name,prot.boundto.name,prot.boundto.phos,prot.structure[pointer]))
          else:
            prot.phosphorylate()
            print("    %s increased phosphorylation to %s -- Register %s nonzero"\
           % (prot.name,prot.phos,prot.structure[pointer]) )
        else:
          print("    %s did nothing -- Register %s is zero" % (prot.name,prot.structure[pointer]))
          
      #METABOLISE A CHEMICAL
      if prot.structure[pointer] == "META":
        pointer = pointer + 1
        index = int(prot.structure[pointer])
        self.change_chemarray(index,prot.phos)
        multiprint("    %s augment le registre %s par %s." % (prot.name,index,prot.phos),\
        "    %s increased register %s by %s." % (prot.name,index,prot.phos))
      #CATABOLISE A CHEMICAL
      if prot.structure[pointer] == "CATA":
        pointer = pointer + 1
        index = int(prot.structure[pointer])
        self.change_chemarray(index,-prot.phos)
        multiprint("    %s diminue le registre %s par %s." % (prot.name,index,prot.phos),\
        "    %s decreased register %s by %s." % (prot.name,index,prot.phos))
      #PHOSPHORYLATE SELF ACCORDING TO CHEMICAL CONCENTRATION
      if prot.structure[pointer] == "SENS":
        pointer = pointer + 1
        if prot.boundto:
          prot.boundto.phos = self.chemarray[prot.structure[pointer]]
          print "    %s's phosphorylation is %s (from register %s by %s)" % (prot.boundto.name,prot.boundto.phos,prot.structure[pointer],prot.name)
        else:
          prot.phos = self.chemarray[prot.structure[pointer]]
          multiprint("    La phosphorylation de %s est %s (au registre %s)" % (prot.name,prot.phos,prot.structure[pointer]),\
        "    %s's phosphorylation is %s (from register %s)" % (prot.name,prot.phos,prot.structure[pointer]))        
      
      #SET CHEMICAL CONCENTRATION ACCORDING TO PHOSPHORYLATION STATE
      #"JESU n" : n -> register[phosphorylation]
      if prot.structure[pointer] == "JESU":
        pointer = pointer + 1
        if prot.boundto:
          self.chemarray[abs(prot.boundto.phos)] = prot.structure[pointer]
          print "    %s phosphor-sets register %s by %s to %s" % (prot.name,prot.boundto.phos,prot.structure[pointer],prot.boundto.name)
        else:
          self.chemarray[abs(prot.phos)] = prot.structure[pointer]
          print "    %s phosphor-sets register %s to %s" % (prot.name,prot.phos,prot.structure[pointer])
      #AND MOVE FORWARD
      pointer = pointer + 1
                
if __name__ == "__main__":
 
  #utility functions
  
  def prettyintro():
    """ so pretty :3 """
    print "       __  __  ___  _   _  ___  ____  "  
    print "      |  \/  |/ _ \| \ | |/ _ \|  _ \ "
    print "      | |\/| | | | |  \| | | | | | | |"
    print "      | |  | | |_| | |\  | |_| | |_| |"
    print "      |_|  |_|\___/|_| \_|\___/|____/ "
    print "      robert harry nicodemus williams~"
    print ""
    multiprint("                 Bonjour.","                 Hello.")
    print ""
    
  def debug():
    """Gets run when debug flag is added -- this just allows low-access level to the 
    contents of the genome and Kadmon primer at the level of code. """
    genome.add_operators(("NULL", "PHOS", "PHOS", "NULL", "BIND"))
    genome.add_gene("TATGAG")
    genome.add_operators(("BEND", "BEGN", "PHOS", "PHOS", "PHOS", "META", "FOOO", "FOOO","LYSE","STOP","NULL","BIND"))
    genome.add_gene("TATGAG")
    genome.add_operators(("BEND","BEGN","PHOS","META","PHOS","DEPH","META","FOOO","DEPH","NULL","BACK","BIND"))
    genome.add_gene("CATGAT")
    #genome.add_random_genes(2000000)
    #genome.add_operator("BEND")
    genome.add_operators(("BEND","PHOS","PHOS","PHOS","PHOS","DEPH","FOOO"))
    genome.add_operators(("BACK","LYSE","STOP"))
    genome.add_operators(("NULL","JESU","PHOS","BIND"))
    genome.add_gene("TATGAG")
    genome.add_operators(("BEND","BEGN","BIND"))
    genome.add_gene("CATGAT")
    genome.add_operators(("BEND","PHOS","PHOS","PHOS","PHOS","PHOS","PHOS","META"))
    genome.add_gene("AGA")
    genome.add_operators(("LYSE","STOP"))
    genome.add_operators(("NULL","BIND"))
    genome.add_gene("TATGAG")
    genome.add_operators(("BEND","BEGN","PHOS","PHOS","META"))
    genome.add_gene("CTA")
    genome.add_operators(("NULL","BIND"))
    genome.add_gene("ATTTTTTTT")
    genome.add_operators(("BEND","DEPH","LYSE","STOP"))
    
    genome.tokenize_genome()
    genome.parse_genome()
    genome.id_bindingsites()
    
    print "".join(genome.genome)
    cell = Cell(["TRAN", "BIND", "*tatgag", "BEND", "BACK", "NULL", "LYSE"])
    #cell = Cell(["PHOS", "PHOS", "PHOS", "META", 5 , "NULL", "SENS", 5, "META", 6, "LYSE"])
    
  def readdna(filename):
    """Reads a .dna file and returns a DNA string. Argument is the name of the file (with .dna)"""
    try:
      genomefile = open(filename,"r")
    except IOError:
      multiprint("%s n'existe pas!" % filename, "%s does not exist!" % filename)
      quit()  
    rawgenomecode = genomefile.readlines()
    genomefile.close()
    
    halfbaked = []
    for line in rawgenomecode:
      newline = [letter for letter in line if letter in "GACT"]
      halfbaked.extend(newline)
    genomecode = "".join(halfbaked)
    return genomecode
    
  def traduire():
    """Translates a .rna file into DNA string. Takes no arguments, instead asking for input and output filenames."""
    multiprint("Quel est le nom du fichier (avec ou sans .rna) qui contient les codons?","codon filename (with or without .rna)")
    codonfilename = raw_input("> ")      
    if codonfilename[-4:] != ".rna": codonfilename = codonfilename + ".rna"
    try:
      codonfile = open(codonfilename,"r")
    except IOError:
      multiprint("%s n'existe pas!" % codonfilename,"%s does not exist!" % codonfilename)
      quit()
    rawcodons = codonfile.read()
    codonfile.close()
    codons = rawcodons.split(" ")
    if codons[-1] == "": codons = codons[:-1]
    
    #convert codons back into dna
    dnastring = ""
    print codons
    for codon in codons:
      #normal case
      if codon in operatorlist:
        dnastring = dnastring+(code.operator_to_codon(codon))
      #binding sequence
      elif codon[0] == "*":
        foo = codon[1:].upper()
        dnastring = dnastring + foo
      #numerical codon
      elif codon[0] in string.digits:
        foo = code.integer_to_codon(codon)
        dnastring = dnastring + foo
      #revert to ballscodon
      else:
        dnastring = dnastring+random.choice(ballscodons)
    multiprint("Quelle est le nom (avec ou sans .dna) du fichier de sortie? Laisser vide pour le meme nom.","output filename? (with or without .rna) leave empty for same name")
    boules = raw_input("> ")
    writefilename = codonfilename[:-4]+".dna"
    if boules:
      if boules[-4:] != ".dna":
        writefilename = boules + ".dna"
    
    writefile = open(writefilename,"w")
    writefile.write(dnastring)
    writefile.close()
    quit()
  
  def numbersplease():
    """Prints out a list of numbers and their corresponding codons."""
    for num in xrange(len(basealphabet)**CODONLENGTH):
      print "%s %s" % (num,code.integer_to_codon(num))
    quit()
  
  def translatetext():
    """Allows user to input a string and recieve DNA string representing it."""
    print "Input plaintext."
    plaintext = raw_input("> ")
    print "DNA string for \"%s\":\n%s" % (plaintext,code.text_to_gene(plaintext))
    quit()
    
  def multiprint(frstring,enstring):
    """Multilingual (for multi in {french, english, caveman}) version of print(). Takes two arguments - the french and english
       versions of the same string. Caveman is emergent."""
    if language == "french":
      print frstring
    elif language == "english":
      print enstring
    else:
      #revert to caveman
      print "Ooga booga bekos."
  
  def complicatedinput():
    """old school type input mechanism, holds your hand a lot"""
    multiprint("Quel est le nom du fichier (avec ou sans .dna) qui contient le code source?","sourcecode filename? (with or without .dna")
    filename = raw_input("> ")      
    if filename[-4:] != ".dna": filename = filename + ".dna"
    
    #actually put contents of file into genome
    genome = Genome(readdna(filename))
    
    #write .rna file
    rnafile = open("%s.rna"%filename[:-4],"w")
    for codon in genome.parsed:
      rnafile.write("%s " % str(codon))
    rnafile.close()
    
    #and now to compute we need the kadmon primer
    multiprint("Quel est le nom du fichier (avec ou sans .rna) qui contient l'amorce?","primer filename? (with or without .rna")
    primerfilename = raw_input("> ")
    if primerfilename[-4:] != ".rna": primerfilename = primerfilename + ".rna"
    primerfile = open(primerfilename,"r")
    rawprimer = primerfile.read()
    primerfile.close()
    primer = rawprimer.split(" ")[:-1]

    # all the input we need has been given and we can finally set the cell in its inexorable motion
    cell = Cell(primer)
    sys.exit(0)
  
  def displayhelp():
    """displays etc..."""
    multiprint("nom de dieu jsais pas cest embrouille ne me demandez pas jne travaille quici","o god i dont know its complicated dont ask me i just work here")
    quit()
  #======================================================================
  #START MAIN PROPER
  #======================================================================
  
  global language
  language = "french"
  
  #whether to print the text in the memorydump or no, 1 = yes, 0 = no
  global suppresstext
  suppresstext = 0
  
  if "-s" in sys.argv:
    suppresstext = 1
    
  #initialise the code, which is always useful
  code = Code()

  #USE ENGLISH YOU FUCKING IMMIGRANTS
  if "-e" in sys.argv:
    language = "english"

  #display the nice title in french, english or caveman
  prettyintro()
  
  #O GOD HALP HALP
  if "-h" in sys.argv:
    displayhelp()
      
  #.rna => .dna mode
  if "-t" in sys.argv:
    traduire()
  
  #print numberlist
  if "-n" in sys.argv:
    numbersplease()
  
  #quick version of -f
  if "-x" in sys.argv:
    xat = sys.argv.index("-x")
    arguments = [sys.argv[xat+1],sys.argv[xat+2]]
    files =  arguments[-2:]
    rnafilename = [fil for fil in files if fil[-3] == "r"][0]
    dnafilename = [fil for fil in files if fil[-3] == "d"][0]

    genome = Genome(readdna(dnafilename))
    
    primerfile = open(rnafilename,"r")
    rawprimer = primerfile.read()
    primerfile.close()
    primer = rawprimer.split(" ")[:-1]
    
    cell = Cell(primer)
    
    sys.exit(0)
  
  #translate text into dna string
  if "-c" in sys.argv:
    translatetext()
    
  #.dna => .rna => computation!
  if "-f" in sys.argv:
    complicatedinput()
    
  # debug by default - ie. what i've been using up to now...
  else:
      genome = Genome()
      debug()