# -*- coding: utf-8 -*-
import pyfold as pf
import tarfile as t
import os
import numpy as np
dpath='/home/thomas/pymut/pymut_dat/'
import math
import sys
import time as tm
rep=int(sys.argv[1])
inigt='UUGGUUAUAAUUUCGCUGGGUGGCGCCCCAUAUGGCGAACUGCAUCAAUGUGACUGUCGAAGGUCGGGACCGAUAUAGACGUUGGGUACCUCUAACCUG'
targ='..(((((....(((((((.((((....)))).))))))).(((.(((((((..((((....(((....)))...))))))))))))))....)))))..'
glen=len(inigt)
cods=['A','C','G','U']
beta=1.0
linc=2.5
def mutlevels(maxfit,minfit,steps):
    d=(maxfit-minfit)/(steps-1)
    return [minfit+i*d for i in range(steps-1,-1,-1)]
expected_fit=mutlevels(.95,.01,15)
ulist=[(-math.log(x)) for x in expected_fit]
def getf(gtype):
    gshape=pf.pyfold(gtype)
    return 1.0/(.01+(float(pf.hamming(gshape,targ)) / (glen))**beta),gshape
def getfl(gtype):
    gshape=pf.pyfold(gtype)
    f=100 - linc*pf.hamming(gshape,targ)
    return (f if f>0 else 0),gshape
def get_ham(gtype):
    gshape=pf.pyfold(gtype)
    return pf.hamming(gtype,targ)
totpos=3*len(inigt)
pf.fold_params()
def get_f_dat(urange):
    dat={}
    for j in range(1,urange+1):
        try:
            f=open('pymut_%i_run_5000'%j)
            dat=f.readlines()[4:]
            f.close()
            totfit=0.0
            totindivs=0
            fit_dis=[]
            weighting=[]
            conseq=[]
            totham=0.0
            fben=0.0
            fdel=0.0
            adel=0.0
            aben=0.0
            #fdel_s= mutation rate of minimum deleterious size
            fdel_s=0.0
            for i in range(0,len(dat),2):
                gtype=dat[i].strip()
                shape=dat[i+1].strip().split(',')[0]
                indivs=int(dat[i+1].strip().split(',')[3])
                fitness=float(dat[i+1].strip().split(',')[4])
                
                if shape == targ:
                    totindivs+=indivs
                    for k in range(glen):
                        for cod in cods:
                            if gtype[k] is cod:
                                continue
                            else:
                                mutg=gtype[:k]+cod+gtype[k+1:]
                                ham_dist=get_ham(mutg)
                                print ham_dist
                                if ham_dist:
                                    if ham_dist==2:
                                        fdel_s+=indivs
                                        
            print i,totindivs,fdel_s
            if totindivs:
                normf=3*glen*totindivs
                dat[i]=fdel_s/normf
            else:
                return dat
        except IOError:
            continue


fz=t.open('p1_g1_mlhrd-%i.tar.gz'%rep)
fnames=fz.getnames()
mnames=[fl for fl in fnames if fl.find('_5000') is not int(-1)]

for nam in mnames:
    fz.extract(nam)
fz.close()
a=tm.time()
dat=get_f_dat(15)
print tm.time()-a
f=open('fdel_avgs.csv','w')
f.write('U,fdel_s\n')
for i in range(1,16):
        if i in dat:
            f.write('%i,%.3f,%.3f\n'%(i,ulist[i-1],dat[i]))
        else:
            f.write('%i,%.3f,NA\n'%(i,ulist[i-1]))
for i in range(1,16):
    try:
        os.remove('pymut_%i_run_5000'%i)
    except OSError:
        continue
pf.pyfreemem()
