import csv
import ols
import sys
from numpy import *
from numpy.random import randn

# stores for each genotype the total number of datapoints for it
totaldic = {}

# a list of dictionaries where each phenotype has a dictionary 
# for which genotypes are mapped to how many datapoints they have
# in which the phenotype is expressed
datalist = []

# number of SNPs
snpnum = 0

# the conditional probability of a phenotype given a genotype
def cond_prob(phenotype, genotype):
    if datalist[phenotype].has_key(genotype):
        return  1.0 * datalist[phenotype][genotype]/totaldic[genotype]
    else:
        return 0

# load data from a csv file with a specified number of phenotypes
# store totaldic and datalist so probabilities can be calculated
# also deletes any old stored info

def load_data(filename, numphenotypes):
    global header, snpnum

    # create a reader object to go throug the csv
    dataread = csv.reader(open(filename, "rb"))

    # store the rownumber
    rownum = 0;

    # fill datalist with a dictionary for each phenotype
    for i in range(numphenotypes):
        datalist.append({})

    # read in data from rows
    for row in dataread:      
        
        # store the header as the first row
        if rownum == 0:
            header = row

        # for all other rows go through columns and store data    
        else:
            stringkey = "";

            # define the key by the genotypes concatenated
            for col in row:
                stringkey += col;
                
            # adds to the genotype keys in the phenotype's dict
            if datalist[int(col)].has_key(stringkey[:-1]):
                datalist[int(col)][stringkey[:-1]]+=1
            else:
                datalist[int(col)][stringkey[:-1]]=1;

            # adds to the phenotype key mapping in total data
            if totaldic.has_key(stringkey[:-1]):
                totaldic[stringkey[:-1]] += 1
            else:
                totaldic[stringkey[:-1]] = 1
        rownum += 1

    snpnum = len(stringkey) - 1 
    return [datalist, totaldic]
