import sys, random # open the sequence data f = open('p53seq.txt',"r") seq = f.readline() seq_len = len(seq) print "Sequence Read!\n" # welcome user, ask for command, execute commands by calling relevant function def welcome(): print "Press G for GC content\nPress C for reverse complement\nPress T for translation\nPress M for mutation analysis\nPress Q to quit\n" var = raw_input("Enter your command and press enter: ") if var == "G" or var == "g": print "\nCalculating GC Content\n" print "GC content: " + str(gc_content(seq)) + "%\n" welcome() elif var == "C" or var == "c": print "\nDetermining Reverse Complement\n" print "\nReverse Complement:\n" # loop through and print the complement with offsets: complement = rev_complement(seq) for i in range((len(complement)/50)): print str(i*50) + ": " + complement[(i*50):(i*50)+50] print str((len(complement)/50)*50) + ": " + complement[(len(complement)/50)*50:len(complement)] print "\n" welcome() elif var == "T" or var == "t": offset = int(raw_input("\nPlease enter the offset (1,2,3,-1,-2,-3): ")) print "\nDetermining Protein Sequence\n" if (offset > 0 and offset < 4) or (offset < 0 and offset > -4): translation = translate(seq,offset) print "Peptide sequence: " + translation print "Peptide length: " + str(len(translation)) + "\n" else: print "\nIncorrect offset\n" welcome() elif var == "M" or var == "m": rounds = int(raw_input("Please enter the number of rounds of mutations: ")) trials = int(raw_input("Please enter the number of trials desired: ")) offset = int(raw_input("Please enter the offset (1,2,3,-1,-2,-3): ")) rate = float(raw_input("Please enter the rate of mutation (eg. 0.01): ")) mutation_generator(seq, offset, rate, rounds, trials) welcome() elif var == "Q": print "\nGoodbye!\n" def gc_content(sequence): sequence_len = len(sequence) gc = 0 # go through the sequence and sum the g or c content for i in range(sequence_len): if sequence[i:i+1] == "g" or sequence[i:i+1] == "c": gc += 1 # calculate gc percentage to 2 significant figures gc_percent = round(((float(gc)/sequence_len)*100),2) return gc_percent def rev_complement(sequence): sequence_len = len(sequence) complement = "" # first create a complementary sequence for i in range(sequence_len): if sequence[i:i+1] == "g": complement += "c" elif sequence[i:i+1] == "c": complement += "g" elif sequence[i:i+1] == "a": complement += "t" elif sequence[i:i+1] == "t": complement += "a" # then, reverse it reverse_complement = complement[::-1] return reverse_complement welcome() def translate(sequence, offset): standard = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W', 'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R', 'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S', 'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S', 'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R', 'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R', 'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G' } sequence_len = len(sequence) peptide = "" seq_copy = "" # set up the appropriate sequence taking into advantage the offset, putting this as seq_copy if(offset == 1): seq_copy = sequence elif(offset > 1): seq_copy = sequence[offset-1:sequence_len] else: seq_copy = rev_complement(sequence) if(offset < -1): seq_copy = seq_copy[(-1*offset)-1:sequence_len] # make sure there is no incomplete triplet at the end of the sequence seq_copy = seq_copy[:len(seq_copy)-(len(seq_copy) % 3)] # go through the sequence in groups of three for i in range(len(seq_copy)/3): triplet = seq_copy[(3*i):(3*i)+3] if(standard[triplet] == "*"): break else: peptide += standard[triplet] return peptide # performs mutation analysis on the sequence, given a rate, and a number of trials and rounds def mutation_generator(sequence, offset, rate, rounds, trials): print "\nPerforming Mutation Analysis\n" print "Rate of mutation: " + str(rate*100) + " %\nRounds of mutations: " + str(rounds) + "\nNumber of trials: " + str(trials) + "\n" #peptide to compare against peptide = translate(sequence, offset) # variable to count truncations truncations = 0 # variable to count elongations elongations = 0 # variable to count mistranslated proteins mistranslations = 0 # list to store the truncated length # truncated_length = [] # alternative way to calculate average length running_average = 0 for i in range(trials): # create a copy of the sequence to mutate mutated_sequence = sequence # perform a number of rounds of mutations for j in range(rounds): mutated_sequence = mutate(mutated_sequence, rate) translation = translate(mutated_sequence,offset) if translation != peptide: mistranslations += 1 # print peptide + " ----> " + translate(mutated_sequence,offset) if len(translation) < len(peptide): truncations += 1 # truncated_length.append(len(translation)) running_average = running_average*(float(truncations-1)/truncations) + float(len(translation))/truncations if len(translation) > len(peptide): elongations += 1 print "\n" #print out stats: print "Percentage mistranslated overall, " + str(100*(float(mistranslations) / trials)) print "Percentage truncated, " + str(100*(float(truncations) / trials)) print "Percentage elongated, " + str(100*(float(elongations) / trials)) print "Percent mistranslated -> truncated, " + str(100*(float(truncations)/mistranslations)) if(running_average > 0): # print "Average length of truncated protein, " + str(float(sum(truncated_length)) / len(truncated_length)) print "Average length of truncated protein: " + str(running_average) print "\n" welcome() def mutate(sequence, rate): nucleotides = ["a","c","g","t"] mutated_sequence = "" for i in range(len(sequence)): if random.random() < rate: #mutate to one of the three other nucleotides: mutation_var = random.randint(0, 2) possible_nucleotides = [x for x in nucleotides if x != sequence[i]] mutated_sequence += possible_nucleotides[mutation_var] else: mutated_sequence += sequence[i] return mutated_sequence welcome()