import sys, random

# open the sequence data
f = open('p53seq.txt',"r")
seq = f.readline()
seq_len = len(seq)

print "Sequence Read!\n"

# welcome user, ask for command, execute commands by calling relevant function
def welcome():
    print "Press G for GC content\nPress C for reverse complement\nPress T for translation\nPress M for mutation analysis\nPress Q to quit\n"
    var = raw_input("Enter your command and press enter: ")
    if var == "G" or var == "g":
        print "\nCalculating GC Content\n"
        print "GC content: " + str(gc_content(seq)) + "%\n"
        welcome()
    elif var == "C" or var == "c":
        print "\nDetermining Reverse Complement\n"
        print "\nReverse Complement:\n"
        # loop through and print the complement with offsets:
        complement = rev_complement(seq)
        for i in range((len(complement)/50)):
            print str(i*50) + ": " + complement[(i*50):(i*50)+50]
        print str((len(complement)/50)*50) + ": " + complement[(len(complement)/50)*50:len(complement)]
        print "\n"
        welcome()
    elif var == "T" or var == "t":
        offset = int(raw_input("\nPlease enter the offset (1,2,3,-1,-2,-3): "))
        print "\nDetermining Protein Sequence\n"
        if (offset > 0 and offset < 4) or (offset < 0 and offset > -4):
            translation = translate(seq,offset)
            print "Peptide sequence: " + translation
            print "Peptide length: " + str(len(translation)) + "\n"
        else:
            print "\nIncorrect offset\n"
        welcome()
    elif var == "M" or var == "m":
        rounds = int(raw_input("Please enter the number of rounds of mutations: "))
        trials = int(raw_input("Please enter the number of trials desired: "))
        offset = int(raw_input("Please enter the offset (1,2,3,-1,-2,-3): "))
        rate = float(raw_input("Please enter the rate of mutation (eg. 0.01): "))
        mutation_generator(seq, offset, rate, rounds, trials)
        welcome()
    elif var == "Q":
        print "\nGoodbye!\n"

def gc_content(sequence):
    sequence_len = len(sequence)
    gc = 0
    
    # go through the sequence and sum the g or c content
    for i in range(sequence_len):
        if sequence[i:i+1] == "g" or sequence[i:i+1] == "c":
            gc += 1
            
    # calculate gc percentage to 2 significant figures
    gc_percent = round(((float(gc)/sequence_len)*100),2)
    return gc_percent

def rev_complement(sequence):
    sequence_len = len(sequence)
    complement = ""

    # first create a complementary sequence
    for i in range(sequence_len):
        if sequence[i:i+1] == "g":
            complement += "c"
        elif sequence[i:i+1] == "c":
            complement += "g"
        elif sequence[i:i+1] == "a":
            complement += "t"
        elif sequence[i:i+1] == "t":
            complement += "a"

    # then, reverse it
    reverse_complement = complement[::-1]
    return reverse_complement
    welcome()

def translate(sequence, offset):
    standard = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C',
            'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C',
            'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*',
            'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',

            'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R',
            'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R',
            'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R',
            'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',

            'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S',
            'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',
            'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',
            'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',

            'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G',
            'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G',
            'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G',
            'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G'
            }

    sequence_len = len(sequence)
    peptide = ""
    seq_copy = ""

    # set up the appropriate sequence taking into advantage the offset, putting this as seq_copy
    if(offset == 1):
        seq_copy = sequence
    elif(offset > 1):
        seq_copy = sequence[offset-1:sequence_len]
    else:
        seq_copy = rev_complement(sequence)
        if(offset < -1):
            seq_copy = seq_copy[(-1*offset)-1:sequence_len]

    # make sure there is no incomplete triplet at the end of the sequence
    seq_copy = seq_copy[:len(seq_copy)-(len(seq_copy) % 3)]
                        
    # go through the sequence in groups of three
    for i in range(len(seq_copy)/3):
        triplet = seq_copy[(3*i):(3*i)+3]
        if(standard[triplet] == "*"):
            break
        else:
            peptide += standard[triplet]

    return peptide

# performs mutation analysis on the sequence, given a rate, and a number of trials and rounds
def mutation_generator(sequence, offset, rate, rounds, trials):
    print "\nPerforming Mutation Analysis\n"
    print "Rate of mutation: " + str(rate*100) + " %\nRounds of mutations: " + str(rounds) + "\nNumber of trials: " + str(trials) + "\n"

    #peptide to compare against
    peptide = translate(sequence, offset)

    # variable to count truncations
    truncations = 0

    # variable to count elongations
    elongations = 0

    # variable to count mistranslated proteins
    mistranslations = 0

    # list to store the truncated length
#    truncated_length = []

    # alternative way to calculate average length
    running_average = 0

    for i in range(trials):

        # create a copy of the sequence to mutate
        mutated_sequence = sequence

        # perform a number of rounds of mutations
        for j in range(rounds):
            mutated_sequence = mutate(mutated_sequence, rate)

        translation = translate(mutated_sequence,offset)

        if translation != peptide:
            mistranslations += 1
#            print peptide + " ----> " + translate(mutated_sequence,offset)
        if len(translation) < len(peptide):
            truncations += 1
#            truncated_length.append(len(translation))
            running_average = running_average*(float(truncations-1)/truncations) + float(len(translation))/truncations
        if len(translation) > len(peptide):
            elongations += 1

    print "\n"
    
    #print out stats:
    print "Percentage mistranslated overall, " + str(100*(float(mistranslations) / trials))
    print "Percentage truncated, " + str(100*(float(truncations) / trials))
    print "Percentage elongated, " + str(100*(float(elongations) / trials))
    print "Percent mistranslated -> truncated, " + str(100*(float(truncations)/mistranslations))
    if(running_average > 0):
#        print "Average length of truncated protein, " +  str(float(sum(truncated_length)) / len(truncated_length))
        print "Average length of truncated protein: " + str(running_average)

    print "\n"
    welcome()

def mutate(sequence, rate):
    nucleotides = ["a","c","g","t"]

    mutated_sequence = ""
    
    for i in range(len(sequence)):
        if random.random() < rate:
            #mutate to one of the three other nucleotides:
            mutation_var = random.randint(0, 2)
            possible_nucleotides = [x for x in nucleotides if x != sequence[i]]
            mutated_sequence += possible_nucleotides[mutation_var]
        else:
            mutated_sequence += sequence[i]

    return mutated_sequence

welcome()