# rm(list=ls())

#
# McGurk power analysis
#

setwd('~/Desktop/mcg_power_analysis/')
source('power_analysis_functions.R')
source('power_analysis_graphs.R')

# make this a function so it isn't accidentally run on sourcing
main <- function() {
    # setup initial simulation parameters
    simulation_parameters <- list(
        N=NA,
        n_stimuli=1,
        n_trials=10,
        group_type='unpaired',
        delta=function(pF) pF + 0.1725,
        limit=function(pF) clip(pF, limits = c(0.05, 0.95))
    )

    Ns <- c(20, 30, 40, 50, 100, 150, 200, 300)

    deltas <-  c(.0945, .1725, .33, .5)
    delta_to_pop_round <- c(5, 10, 20, 30)/100

    pop_df <- get_fusion_df()

    # find a 3% effect
    set.seed(122)
    simulation_parameters$delta <- function(x) x + 0.06125
    simulation_parameters$N <- 35*1000
    simulation_parameters$n_stimuli <- 200
    run_one_sim(simulation_parameters, pop_df, analyze=TRUE)

    # test to ensure the given delta produces an actual difference of 3%
    analyze_sim_results(d03, pop_delta = 0.03)

    # 5% effect, N=36
    set.seed(122)
    simulation_parameters$N <- 36
    simulation_parameters$n_stimuli <- 1
    simulation_parameters$delta <- function(x) x + deltas[1]
    d05 <- do_sim(simulation_parameters, chunk_size = 5000, n.rep = 1)

    #n.rep here is chunk_size * n.rep
    analyze_sim_results(d05, pop_delta = 0.1)

    # get population level effect by using n=35,000 and n_stim=200
    set.seed(122)
    simulation_parameters$N <- 35*1000
    simulation_parameters$n_stimuli <- 200
    population_eff <- run_one_sim(simulation_parameters, pop_df, analyze=FALSE)
    pdf.pgb(population_eff, fname='n3ht_ex_plot.pdf')

    # get n=300 effect
    set.seed(122)
    simulation_parameters$N <- 300
    simulation_parameters$n_stimuli = 'McGr2'
    n300_eff <- run_one_sim(simulation_parameters, pop_df, summary=TRUE)

    # plot.grp_bplot(n300_eff)
    pdf.pgb(n300_eff, fname='n300_ex_plot.pdf')

    # get n=30 effect
    set.seed(122)
    simulation_parameters$N <- 30
    n30_eff <- run_one_sim(simulation_parameters, pop_df, summary=TRUE)
    # plot.grp_bplot(n30_eff)
    pdf.pgb(n30_eff, fname='n30_ex_plot.pdf')


    ## Figure 2
    # get the long-run behavior of N=30 vs. N=300
    simulation_parameters$n_stimuli <- 1
    simulation_parameters$N <- 300
    n300 <- do_sim(simulation_parameters, chunk_size = 1000, n.rep = 5)
    # saveRDS(n300, 'n300.RDS')
    n300 <- readRDS('n300.RDS')

    analyze_sim_results(n300, pop_delta = 0.1)
    #choose ymax wisely, based on n.rep and chunk_size
    # plot.power_hist(n300, .1, ymax=300)

    set.seed(122)
    simulation_parameters$N <- 30
    n30 <- do_sim(simulation_parameters, chunk_size = 1000, n.rep = 5)
    analyze_sim_results(n30, pop_delta = 0.1)
    plot.power_hist(n30, .1, ymax=300)

    # write out the figures
    ph.pdf(n300, .1, fname='n300_power_hist.pdf')
    ph.pdf(n30, .1, fname='n30_power_hist.pdf')


    # Figure 3 is the effect of INC delta, n_stim, and n_trials (all across increasing sample size)

    # effect of increasing delta
    simulation_parameters <- list(
        N=NA,
        n_stimuli=1,
        n_trials=10,
        group_type='unpaired',
        delta=function(pF) return (NA),
        limit=function(pF) clip(pF, limits = c(0.05, 0.95))
    )
    set.seed(09292017)
    results <- NULL

    # if restarting, then load the existing dataset
    # results <- as.matrix(read.csv('power_results_increasing_delta20.csv'))
    for(delta in deltas) {
        simulation_parameters$delta <- function(x) x + delta
        cat('---- delta=', delta, '----\n')
        for(n in Ns) {
            simulation_parameters$N <- n
            cat('\tN =', n, '\t')
            # choosing chunk_size and n.rep here to get sufficient simulations but not tax the CPU too hard
            res <- do_sim(simulation_parameters, chunk_size = 125, n.rep = 120)
            results %<>% rbind(analyze_sim_results(res, delta_to_pop_round[which(delta == deltas)]))
            cat('Done with', nrow(results), 'of', length(deltas)*length(Ns), '\n')

            # we're overwriting here, but we're rbind'ing above. This is helpful to save to intermediate results
            write.csv(results, file='power_results_increasing_delta30.csv', row.names = FALSE)
            do_sleep(3)
        }
    }

    # Impact of increasing stimuli
    results <- NULL
    # results <- as.matrix(read.csv('power_results_increasing_stimuli.csv'))
    simulation_parameters$delta <- function(x) x + deltas[2]
    for(n_stim in c(2, 4, 8, 16)) {
        simulation_parameters$n_stimuli <- n_stim
        cat('---- n_stim =', n_stim, '----\n')
        for(n in Ns) {
            simulation_parameters$N <- n
            cat('\tN =', n, '\t')
            res <- do_sim(simulation_parameters, chunk_size = 125, n.rep = 120)
            results %<>% rbind(analyze_sim_results(res, .1))
            cat('Done with', nrow(results), 'of', 4*length(Ns), '\n')

            gc()
            #the 2 here is because we're using deltas[2]
            write.csv(results, file='power_results_increasing_stimuli2.csv', row.names = FALSE)
            do_sleep(3)
        }
    }

    # Impact of increasing # trials
    results <- NULL
    # results <- as.matrix(read.csv('power_results_increasing_trials.csv'))
    simulation_parameters$delta <- function(x) x + deltas[2]
    simulation_parameters$n_stimuli <- 1
    for(n_trials in c(100, 1000)) {

        simulation_parameters$n_trials <- n_trials
        cat('---- # Trials =', n_trials, '----\n')
        for(n in Ns) {
            simulation_parameters$N <- n
            cat('\tN =', n, '\t')
            res <- do_sim(simulation_parameters, chunk_size = 125, n.rep = 120)
            results %<>% rbind(analyze_sim_results(res, .1))
            cat('Done with', nrow(results), 'of', 4*length(Ns), '\n')

            #the 2 here is because we're using deltas[2]
            write.csv(results, file='power_results_increasing_trials2.csv', row.names = FALSE)
            do_sleep(3)
        }
    }

    # # # demonstrate various combinations -- see result section:
    # Experimental Power: Sensitivity to different experimental manipulations

    # power for pop_eff <- 0.1
    # what sample size is needed for power=0.8
    simulation_parameters$n_stimuli <- 1
    simulation_parameters$delta <- function(x) x + deltas[2]
    simulation_parameters$N <- 450
    analyze_sim_results(do_sim(simulation_parameters, chunk_size = 200, n.rep=20, cl=cl), pop_delta = 0.1)[,'power']

    # what if we increase stimuli at pop_eff = 10%?
    simulation_parameters$n_stimuli <- 2
    simulation_parameters$N <- 300
    analyze_sim_results(do_sim(simulation_parameters, chunk_size = 200, n.rep=20, cl=cl), pop_delta = 0.1)[,'power']

    # go to 16 stimuli
    simulation_parameters$n_stimuli <- 16
    simulation_parameters$N <- 205
    analyze_sim_results(do_sim(simulation_parameters, chunk_size = 200, n.rep=20, cl=cl), pop_delta = 0.1)[,'power']

    # back to 1 stimulus, but 1000 trials
    simulation_parameters$n_stimuli <- 1
    simulation_parameters$n_trials <- 1000
    simulation_parameters$N <- 405
    cl <- makeForkCluster(8)
    clusterSetRNGStream(cl, as.integer(as.POSIXct(Sys.time())))
    analyze_sim_results(do_sim(simulation_parameters, chunk_size = 200, n.rep=20, cl=cl), pop_delta = 0.1)[,'power']

    #what if we chnage the number of trials to be small? #NB: increase delta
    simulation_parameters$delta <- function(pF) pF + deltas[3]
    simulation_parameters$n_trials <- 2
    simulation_parameters$N <- 150
    stopCluster(cl); gc(); cl <- makeForkCluster(4)
    clusterSetRNGStream(cl, as.integer(as.POSIXct(Sys.time())))
    analyze_sim_results(do_sim(simulation_parameters,
                               chunk_size = 300, n.rep=50, cl=cl), pop_delta = delta_to_pop_round[3])


    # for pop_eff - 20%
    simulation_parameters$delta <- function(x) x + deltas[3]
    power_at_n <- sapply(c(105, 110, 115), function(N) {
        simulation_parameters$N <- N
        res <- analyze_sim_results(do_sim(simulation_parameters, chunk_size = 1000, n.rep=1, cl=cl))
        res[,'power']
    })


    # for pop_eff = 10%, what is needed sample size for 80% power
    simulation_parameters <- list(
        N=450,
        n_stimuli=1,
        n_trials=10,
        group_type='unpaired',
        delta=function(pF) pF + deltas[2],
        limit=function(pF) clip(pF, limits = c(0.05, 0.95))
    )

    set.seed(122)
    analyze_sim_results(do_sim(simulation_parameters, chunk_size = 300, n.rep=50))
    # n.rep   N n_stim n_trials is_paired  delta pop_eff     power mean Eff Est abs Eff Est mean_abs_ratio p(wrong_sign) p(sig > pop_eff) mean non sig est p(non-sig < pop_eff)
    # 15000 450      1       10         0 0.1725     0.1 0.8048667    0.1134637   0.1134637       1.134637             0        0.6384494       0.04492305                    1

}
