import dataread
import ols
import sys
from numpy import *
from numpy.random import randn

template = """
##### This is an automatically generated model file created by logistic
##### regression on the SNPs listed above.

from numpy import *
from sys import argv

genotypes = argv[1:]

genotype_list = [1] + [int(genotype) for genotype in genotypes]
genotype_array = array(genotype_list)

##### PVALS GO HERE #####
pvals = %r

val = -1*inner(genotype_array, pvals)
risk = 1/(1+exp(val))
print risk
"""

# turn a phenotype string into an array for regression
def str_to_arr(phenotype):
    return array([int(x) for x in phenotype])

# load the regression for a particular phenotype                               
def load_log_reg(phenotype):
    # set a sentinel value to know if you are on the first key later
    sentinel = 0
    
    # for each genotype find the conditional probability
    for genotype in  dataread.datalist[phenotype].keys():

        # find the conditional probability of the phenotype given the key
        prob = dataread.cond_prob(phenotype, genotype)

        # caculate the logit of the probability
        logit_prob = log(prob) - log(1-prob)

        # if it is the first genotype, initialize the arrays accordingly
        if sentinel == 0:
            xarr = array([str_to_arr(genotype)])
            yarr = array([logit_prob])

        # for each genotype after the first, append values accordingly  
        else:
            xarr = append(xarr, [str_to_arr(genotype)], axis=0)
            yarr = append(yarr, [logit_prob], axis=0)
        sentinel += 1

    xlist = []
    for i in range(dataread.snpnum):
        varname = 'x' + str(i)
        xlist.append(varname)
    #print >>sys.stderr, xarr
    mymodel = ols.ols(yarr,xarr,'yarr',xlist)    
    return mymodel.p

# start of the code    
def main():
        
    # read in filename from command line and open it to read
    filename = sys.argv[1]
    loaded_data = dataread.load_data(filename, 2)
    pvals = load_log_reg(0)

    # create Trait-o-matic model file
    output = ""
    for snp_id in dataread.header[:-1]:
        output += "# rsid:%s\n" % snp_id
    output += template % pvals
    print output

# run the code 
if __name__ == "__main__":
    if len(sys.argv) < 1:
            print "please specify a file name"
            sys.exit(1)
    main()
