# -*- coding: utf-8 -*-
import pyfold as pf
import tarfile as t
import os
import numpy as np
dpath='/home/thomas/pymut/pymut_dat/'
import math
import sys
import time as tm
import random
#rep=int(sys.argv[1])
inigt='UUGGUUAUAAUUUCGCUGGGUGGCGCCCCAUAUGGCGAACUGCAUCAAUGUGACUGUCGAAGGUCGGGACCGAUAUAGACGUUGGGUACCUCUAACCUG'
target='..(((((....(((((((.((((....)))).))))))).(((.(((((((..((((....(((....)))...))))))))))))))....)))))..'
glen=len(inigt)
cods=['A','C','G','U']
beta=1.0
linc=0.025
def mutlevels(maxfit,minfit,steps):
    d=(maxfit-minfit)/(steps-1)
    return [minfit+i*d for i in range(steps-1,-1,-1)]
expected_fit=mutlevels(.95,.01,15)
ulist=[(-math.log(x)) for x in expected_fit]

def calc_s(gtype,target):
    gshape=pf.pyfold(gtype)
    s=linc*pf.hamming(gshape,target)
    return s if s<1.0 else 1
    
def get_ham(gtype):
    gshape=pf.pyfold(gtype)
    #print targ
    #print gshape
    return pf.hamming(gshape,targ)
totpos=3*len(inigt)
pf.fold_params()

def get_f_dat(urange):
    fdel_dat={}
    fdel_tot={}
    adel_dat={}
    for j in range(1,urange+1):
        try:
            f=open('pymut_%i_run_5000'%j)
            dat=f.readlines()[4:]
            f.close()
            totfit=0.0
            totindivs=0
            fit_dis=[]
            weighting=[]
            conseq=[]
            totham=0.0
            fben=0.0
            fdel=0.0
            adel=0.0
            aben=0.0
            #fdel_s= mutation rate of minimum deleterious size
            fdel_s2=0.0
            fdel_s4=0.0
            fdel_s6=0.0
            for i in range(0,len(dat),2):
                gtype=dat[i].strip()
                shape=dat[i+1].strip().split(',')[0]
                indivs=int(dat[i+1].strip().split(',')[3])
                fitness=float(dat[i+1].strip().split(',')[4])
                
                if shape == targ:
                    totindivs+=indivs
                    for k in range(glen):
                        for cod in cods:
                            if gtype[k] is cod:
                                continue
                            else:
                                mutg=gtype[:k]+cod+gtype[k+1:]
                                ham_dist=get_ham(mutg)
                                #print ham_dist
                                if ham_dist:
                                    fdel+=indivs
                                    s=2.5*ham_dist*100/99.0
                                    adel+=s if s<100 else 100
                                    if ham_dist==2:
                                        fdel_s2+=indivs
                                    elif ham_dist==4:
                                        fdel_s4+=indivs
                                    elif ham_dist==6:
                                        fdel_s6+=indivs
            print j,totindivs,fdel_s2,fdel_s4,fdel_s6
            if totindivs:
                normf=3*glen*totindivs
                fdel_dat[j]=(fdel_s2/normf,fdel_s4/normf,fdel_s6/normf)
                adel_dat[j]=adel/fdel
                fdel_tot[j]=fdel/normf
        except IOError:
            continue
    return fdel_dat,fdel_tot,adel_dat
    
def harmonic_del(gtype,target):
    '''
    Calculates the harmonic mean effect of deleterious mutations for a given genotype.
    '''
    bases=['A','C','G','U']
    pos_muts=len(gtype)*3
    num_del=0
    inv_sel=[]
    f=open('mut_shape_only.txt','w')
    for i,cur_base in enumerate(gtype):
        for base in bases:
            if base==cur_base:
                continue
            else:
                mut_gtype=gtype[:i]+base+gtype[i+1:]
                mut_phen=pf.pyfold(mut_gtype)
                print target
                print mut_phen,pf.hamming(mut_phen,target)
                #f.write('%s\n%s %i\n'%(target,mut_phen,pf.hamming(mut_phen,target)))
                f.write('%s\n'%(mut_phen,))
                if mut_phen != target:
                    inv_sel.append(1.0/calc_s(mut_gtype,target))
    f.close()
    return len(inv_sel)/sum(inv_sel),inv_sel

def sample_optimal(f,target):
    dat=open(f).readlines()[4:]
    optimal=[]
    for i in range(0,len(dat),2):
        phenotype=dat[i+1].split(',')[0]
        if phenotype==target:
            optimal.extend([dat[i].strip()]*int(dat[i+1].split(',')[3]))
    return random.sample(optimal,1)
    
f='pymut_5_run_5000'
gtype=sample_optimal(f,target)[0]
print gtype,len(gtype)
#harmonic,inv_sel=harmonic_del(inigt,target)
harmonic,inv_sels=harmonic_del(gtype,target)
'''
fz=t.open('p1_g1_ml2_r%i.tar.gz'%rep)
fnames=fz.getnames()
mnames=[fl for fl in fnames if fl.find('_5000') is not int(-1)]

for nam in mnames:
    fz.extract(nam)
fz.close()
a=tm.time()
dat,fdel_tot,adel=get_f_dat(15)
print tm.time()-a
#print dat.keys()
f=open('fdel_avgs.csv','w')
f.write('U,fdel_s1,fdel_s2,fdel_s3,fdel_tot,adel_avg\n')
for i in range(1,16):
        if i in dat:
            f.write('%i,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n'%(i,ulist[i-1],dat[i][0],dat[i][1],dat[i][2],fdel_tot[i],adel[i]))
        else:
            f.write('%i,%.3f,NA,NA,NA,NA,NA\n'%(i,ulist[i-1]))
f.close()

for i in range(1,16):
    try:
        os.remove('pymut_%i_run_5000'%i)
    except OSError:
        continue
'''
pf.pyfreemem()
