library(AvonData)
library(lubridate)
library(plyr)
library(EpiGLM)

data(AvonOutbreaks)
data(caseRecordsFull)
xx <- caseRecordsFull

notes <- list()

## subset to cases attributed in whole or in part to norovirus
xx <- xx[grep("norovirus", xx$Organism),]

## Correct putative data-entry errors
ind <- which(mapply(function(x, y) x==76 & y=="2002-06-10",
                    xx$OutbreakNumber, xx$OnsetDate))
xx$OnsetDate[ind] <- "2002-10-06"
ind <- which(mapply(function(x, y) x==76 & y=="2002-04-10",
                    xx$OutbreakNumber, xx$OnsetDate))
xx$OnsetDate[ind] <- xx$X1stSymptomFreeDate[ind] <- "2002-10-04"
ind <- which(mapply(function(x, y) x==76 & y=="2002-11-15",
                    xx$OutbreakNumber, xx$X1stSymptomFreeDate))
xx$X1stSymptomFreeDate[ind] <- "2002-10-15"

## Remove dates that describe an impossible symptomatic
badDates <- with(xx, which(OnsetDate > X1stSymptomFreeDate))
xx$X1stSymptomFreeDate[badDates] <- NA
xx$OnsetDate[badDates] <- NA
notes$n_bad_dates <- length(badDates)
rm(badDates)

# Remove dates where CaseCode is missing
missing_case_code <- is.na(xx$CaseCode)
xx <- subset(xx, !missing_case_code)
notes$n_missing_case_code <- sum(missing_case_code)
rm(missing_case_code)

# Make less specific case code
renamer <- function(x) {
  ret <- switch(x, "Hosp staff"="staff", "NH staff"="staff",
                "Patient"="patient", "Resident"="patient")
  if(is.null(ret)) ret <- NA
  ret
}
renamer <- Vectorize(renamer)
xx$CaseType <- renamer(xx$CaseCode)
rm(renamer)
xx$outbreak_code <- with(xx, paste(OutbreakNumber, CaseType, sep="_"))

# Tabulate missing data 
xx$OnsetFreeingRep <- with(xx,interaction(!is.na(OnsetDate),
                                          !is.na(X1stSymptomFreeDate)))
xx2 <- ddply(xx, .(OutbreakNumber, CaseType), summarise,
             outbreak_code=outbreak_code[1],
             OBSize=length(OutbreakNumber),
             bothDates=sum(OnsetFreeingRep=="TRUE.TRUE"),
             onsetOnly=sum(OnsetFreeingRep=="TRUE.FALSE"),
             freeingOnly=sum(OnsetFreeingRep=="FALSE.TRUE"),
             noDates=sum(OnsetFreeingRep=="FALSE.FALSE"),
             initialInf=sum(OnsetDate == min(OnsetDate, na.rm=TRUE), na.rm=TRUE))
xx2$propFreeImp <- with(xx2, onsetOnly/(onsetOnly + bothDates))
xx2$propOnsetNA <- with(xx2, (noDates + freeingOnly)/(onsetOnly + bothDates))

## Subset according to tolerance for missing data
maxPONA <- 0.07
maxPRNA <- 0.55
xx2 <- subset(xx2, propFreeImp<=maxPRNA & propOnsetNA <= maxPONA)
xx <- subset(xx, outbreak_code %in% xx2$outbreak_code)

## Hack to include all types of susceptibles in all outbreaks in model
## matrix. Without doing this, simulator would sometimes simulate
## outbreaks with only the types observed to be infected. That does
## not agree with our model, which posits that the number of initial
## susceptibles should be a function of predictor variables. 
foo <- expand.grid(OutbreakNumber=unique(xx2$OutbreakNumber),
                   CaseType=unique(xx2$CaseType), stringsAsFactors=FALSE)
foo$outbreak_code <- with(foo, paste(OutbreakNumber, CaseType, sep="_"))
complement <- setdiff(foo$outbreak_code, xx2$outbreak_code)
foo <- subset(foo, foo$outbreak_code %in% complement)
foo <- cbind(foo, initialInf=0)
xx2 <- rbind.fill(xx2, foo)

# Get predictor vars
starts <- ddply(xx, "OutbreakNumber", summarize,
                month=min(month(OnsetDate), na.rm=TRUE))
starts$season <- NA
starts$season[starts$month %in% c(10:12, 1:3)] <- "fall-winter"
starts$season[starts$month %in% 4:9] <- "spring-summer"
key <- match(xx2$OutbreakNumber, starts$OutbreakNumber)
xx2$season <- starts$season[key]
rm(starts)
key <- match(xx2$OutbreakNumber, AvonOutbreaks$OutbreakNumber)
xx2$facility <- AvonOutbreaks$HomeHos[key]
AvonOutbreaks$cap <- with(AvonOutbreaks,
                          ifelse(is.na(UnitBeds), MaxResidents,
                                 UnitBeds))
AvonOutbreaks$medians <- with(AvonOutbreaks,
                              ave(cap, HomeHos,
                                  FUN=function(x) median(x, na.rm=TRUE)))
AvonOutbreaks$is_small <- with(AvonOutbreaks, cap < medians)
xx2$is_small <- AvonOutbreaks$is_small[key]
rm(key)

## Fitting

xx2$scale <- 1
xx2$CaseType <- as.factor(xx2$CaseType)
xx2$season <- as.factor(xx2$season)

xx3 <- xx2
xx3$propOnsetNA <- 0
xx3$propFreeImp <- 0

##

paso <- get_data(static=xx3, mode="passthrough", case_records=xx)

ress1 <- get_est(job=list(id=-1), static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("set1"),
               limit_staff_to_one_day=FALSE)
ress1$coefnames
summary(ress1$fit$admb)

ress1t <- get_est(job=list(id=-2), static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("set1"),
               limit_staff_to_one_day=TRUE)
ress1t$coefnames
summary(ress1t$fit$admb)

library(BatchExperiments)

rm(reg)
system("rm -r simReg-files")

reg <- makeExperimentRegistry(id="simReg",
                              packages=c("EpiGLM", "plyr"))
repls <- 10

nn <- Sys.info()["nodename"]
if (nn == "phylocluster.ccbb.utexas.edu"){
  resources <- list(queue="wilke")
  chunk.size <- ceiling(18 * repls / 24)
} else {
  resources <- list()
  chunk.size <- 1
}

addProblem(reg, id="prob", static=xx3, dynamic=get_data_wrapper)

prob.design <- makeDesign("prob", exhaustive=list(floor_times=c(FALSE, TRUE),
                                    nreps=1, mode="sim",
                                    ip_dispersion=0.5,
                                    tr1=-5.78, tr2=1.912, tr3=0.484, tr4=-0.09, tr5=-1.01,
                                    gr1=-1.465935, gr2=0.98, gr3=0.008, gr4=0.00265, gr5=-0.86))


xx4 <- xx2
xx4$propFreeImp[is.na(xx4$propFreeImp)] <- 0.078
xx4$propOnsetNA[is.na(xx4$propOnsetNA)] <- 0.0014
addProblem(reg, id="prob2", static=xx4, dynamic=get_data_wrapper)

prob2.design <- makeDesign("prob2", exhaustive=list(floor_times=c(FALSE, TRUE),
                                    nreps=1, mode="sim",
                                    ip_dispersion=0.5,
                                    tr1=-5.78, tr2=1.912, tr3=0.484, tr4=-0.09, tr5=-1.01,
                                    gr1=-1.465935, gr2=0.98, gr3=0.008, gr4=0.00265, gr5=-0.86))


addAlgorithm(reg, id="est", fun=get_est)
alg.design <- makeDesign("est", exhaustive=list(gr_intercept="yes",
                                  gr_terms="set1", altfits=TRUE,
                                  limit_staff_to_one_day=FALSE))

addExperiments(reg, prob.designs=prob.design,
               algo.designs=alg.design, repls=repls)

addExperiments(reg, prob.designs=prob2.design,
               algo.designs=alg.design, repls=repls)

addExperiments(reg, prob.designs=prob.design,
               algo.designs=alg.design, repls=20, skip.defined=TRUE)


summarizeExperiments(reg)
submitJobs(reg, resources=resources)
 showStatus(reg)

resL <- reduceResultsList(reg)
fit2ll <- sapply(resL, function(x) {b <- x$fit2$admb$loglik; if(!is.null(b)) b else NA})
fit1ll <- sapply(resL, function(x) {b <- x$fit$admb$loglik; if(!is.null(b)) b else NA})
fitcanll <- sapply(resL, function(x) {b <- x$fitcan$admb$loglik; if(!is.null(b)) b else NA})

##

##

simo <- get_data_wrapper(job=list(id=1), static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.59, tr2=0, tr3=0, tr4=0, tr5=0,
                         gr1=-1.31, gr2=0, gr3=0, gr4=0, gr5=0,
                         floor_times=FALSE, nreps=3, maxinftime="none")


ressim <- get_est(job=list(id=2), static=xx3, dynamic=simo, method="admb",
                  gr_intercept="no", gr_terms="scale",
                  tr_intercept="no", tr_terms="scale",
                  limit_staff_to_one_day=FALSE)

ressim$fit$admb


ressimbad <- get_est(job=list(id=2), static=xx3, dynamic=simo, method="admb",
                  gr_intercept="no", gr_terms="scale",
                  tr_intercept="no", tr_terms="scale",
                  limit_staff_to_one_day=TRUE)

ressimbad$fit$admb

simo <- get_data_wrapper(job=list(id=1), static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.59, tr2=0, tr3=0, tr4=0, tr5=0,
                         gr1=-1.31, gr2=0, gr3=0, gr4=0, gr5=0,
                         floor_times=FALSE, nreps=10, maxinftime="limit2")

ressimbad <- get_est(job=list(id=2), static=xx3, dynamic=simo, method="admb",
                  gr_intercept="no", gr_terms="scale",
                  tr_intercept="no", tr_terms="scale",
                  limit_staff_to_one_day=FALSE)

ressimbad$fit$admb

ressim <- get_est(job=list(id=2), static=xx3, dynamic=simo, method="admb",
                  gr_intercept="no", gr_terms="scale",
                  tr_intercept="no", tr_terms="scale",
                  limit_staff_to_one_day=TRUE)

ressim$fit$admb


##

paso <- get_data(static=xx3, mode="passthrough", case_records=xx)

res <- get_est(static=xx3, dynamic=paso, method="admb",
               gr_intercept="no", gr_terms=c("scale"),
               limit_staff_to_one_day=FALSE)

resf <- get_est(static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms="facility",
               limit_staff_to_one_day=FALSE)

ress <- get_est(job=list(id=1),static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("season"),
               limit_staff_to_one_day=FALSE)

resi <- get_est(static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("is_small"),
               limit_staff_to_one_day=FALSE)
resi$coefnames
summary(resi$fit$admb)

resc <- get_est(static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("CaseType"),
               limit_staff_to_one_day=FALSE)
resc$coefnames
summary(resc$fit$admb)

ress1 <- get_est(job=list(id=-1), static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("set1"),
               limit_staff_to_one_day=FALSE)
ress1$coefnames
summary(ress1$fit$admb)

ress1t <- get_est(job=list(id=-2), static=xx3, dynamic=paso, method="admb",
               gr_intercept="yes", gr_terms=c("set1"),
               limit_staff_to_one_day=TRUE)
ress1t$coefnames
summary(ress1t$fit$admb)


##

simo <- get_data_wrapper(job=list(id=1), static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.59, tr2=1.93, tr3=0, tr4=0, tr5=0,
                         gr1=-1.31, gr2=1.04, gr3=0, gr4=0, gr5=0,
                         floor_times=FALSE, nreps=1)


ressim <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms="facility",
                  tr_intercept="yes", tr_terms="facility",
                  limit_staff_to_one_day=FALSE)

ressim$fit$admb

##

simo <- get_data_wrapper(static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.59, tr2=0, tr3=.48, tr4=0, tr5=0,
                         gr1=-1.47, gr2=0, gr3=0.01, gr4=0, gr5=0,
                         floor_times=FALSE, nreps=10)


ressim <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms="is_small",
                  tr_intercept="yes", tr_terms="is_small",
                  limit_staff_to_one_day=FALSE)

ressim$fit$admb

##

simo <- get_data_wrapper(static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.78, tr2=0, tr3=0, tr4=-0.09, tr5=0,
                         gr1=-1.465935, gr2=0, gr3=0, gr4=0.00265, gr5=0,
                         floor_times=FALSE, nreps=10)


ressim <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms="season",
                  tr_intercept="yes", tr_terms="season",
                  limit_staff_to_one_day=FALSE)

summary(ressim$fit$admb)

##

simo <- get_data_wrapper(static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.59, tr2=0, tr3=0, tr4=0, tr5=-1.01,
                         gr1=-1.31, gr2=0, gr3=0, gr4=0, gr5=-0.86,
                         floor_times=FALSE, nreps=2)

ressim <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms="CaseType",
                  tr_intercept="yes", tr_terms="CaseType",
                  limit_staff_to_one_day=FALSE)

summary(ressim$fit$admb)

##

simo <- get_data_wrapper(static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.78, tr2=0, tr3=0, tr4=-0.09, tr5=-1.01,
                         gr1=-1.465935, gr2=0, gr3=0, gr4=0.00265, gr5=-0.86,
                         floor_times=FALSE, nreps=100)

ressim <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms=c("season", "CaseType"),
                  limit_staff_to_one_day=FALSE)

summary(ressim$fit$admb)

##

simo <- get_data_wrapper(static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.78, tr2=1.912, tr3=0.484, tr4=-0.09, tr5=-1.01,
                         gr1=-1.465935, gr2=0.98, gr3=0.008, gr4=0.00265, gr5=-0.86,
                         floor_times=FALSE, nreps=10)

ressim <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms=c("set1"),
                  limit_staff_to_one_day=FALSE)

summary(ressim$fit$admb)

##

simo <- get_data_wrapper(static=xx3, mode="sim", 
                         ip_dispersion=0.5, case_records=NULL,
                         tr1=-5.78, tr2=1.912, tr3=0.484, tr4=-0.09, tr5=-1.01,
                         gr1=-1.465935, gr2=0.98, gr3=0.008, gr4=0.00265, gr5=-0.86,
                         floor_times=TRUE, nreps=1)

ressimf <- get_est(static=xx3, dynamic=simo, method="admb",
                  gr_intercept="yes", gr_terms=c("set1"),
                  limit_staff_to_one_day=FALSE)



summary(ressimf$fit$admb)

#

# Batch-time

library(BatchExperiments)

rm(reg)
system("rm -r simReg-files")

reg <- makeExperimentRegistry(id="simReg",
                              packages=c("EpiGLM", "plyr"))
repls <- 10

nn <- Sys.info()["nodename"]
if (nn == "phylocluster.ccbb.utexas.edu"){
  resources <- list(queue="wilke")
  chunk.size <- ceiling(18 * repls / 24)
} else {
  resources <- list()
  chunk.size <- 1
}

addProblem(reg, id="prob", static=xx3, dynamic=get_data_wrapper)

prob.design <- makeDesign("prob", exhaustive=list(floor_times=c(FALSE, TRUE),
                                    nreps=1, mode="sim", ip_dispersion=0.5,
                                    tr1=-5.78, tr2=1.912, tr3=0.484, tr4=-0.09,
                                    tr5=-1.01, gr1=-1.465935, gr2=0.98,
                                    gr3=0.008, gr4=0.00265, gr5=-0.86,
                                    maxinftime=c("none", "limit2")))

addAlgorithm(reg, id="est", fun=get_est)
alg.design <- makeDesign("est", exhaustive=list(gr_intercept="yes",
                                  gr_terms="set1", altfits=TRUE,
                                  limit_staff_to_one_day=c(FALSE, TRUE)))

addExperiments(reg, prob.designs=prob.design,
               algo.designs=alg.design, repls=repls)

addExperiments(reg, prob.designs=prob.design,
               algo.designs=alg.design, repls=20, skip.defined=TRUE)


summarizeExperiments(reg)
submitJobs(reg, resources=resources)
 showStatus(reg)

resL <- reduceResultsList(reg)
fit2ll <- sapply(resL, function(x) {b <- x$fit2$admb$loglik; if(!is.null(b)) b else NA})
fit1ll <- sapply(resL, function(x) {b <- x$fit$admb$loglik; if(!is.null(b)) b else NA})
fitcanll <- sapply(resL, function(x) {b <- x$fitcan$admb$loglik; if(!is.null(b)) b else NA})


resdf <- reduceResultsExperiments(reg, fun=regest2df)

is_covered <- function(x, y, z) x < y && y < z
foo <- names(resdf)[grep("hat_c[tg]r\\.[1-5]$", names(resdf))]
for(bar in foo){
  y <- resdf[, bar]
  l <- paste(bar, "lower", sep="_")
  x <- resdf[, l]
  u <- paste(bar, "upper", sep="_")
  z <- resdf[, u]  
  ic <- paste(bar, "is_covered", sep="_")
  resdf[, ic] <- mapply(is_covered, x, y, z)  
}

test <- !is.na(resdf[, ic])

res_tab <- ddply(resdf[test, ], c("prob", "floor_times", "maxinftime",
                                  "limit_staff_to_one_day"),
                 summarize,
                 bias.ctr1=mean(hat_ctr.1 - tr1),
                 bias.ctr2=mean(hat_ctr.2 - tr2),
                 bias.ctr3=mean(hat_ctr.3 - tr3),
                 bias.ctr4=mean(hat_ctr.4 - tr4),
                 bias.ctr5=mean(hat_ctr.5 - tr5),
                 bias.cgr1=mean(hat_cgr.1 - gr1),
                 bias.cgr2=mean(hat_cgr.2 - gr2),
                 bias.cgr3=mean(hat_cgr.3 - gr3),
                 bias.cgr4=mean(hat_cgr.4 - gr4),
                 bias.cgr5=mean(hat_cgr.5 - gr5),
                 cov.ctr1=mean(hat_ctr.1_is_covered),
                 cov.ctr2=mean(hat_ctr.2_is_covered),
                 cov.ctr3=mean(hat_ctr.3_is_covered),
                 cov.ctr4=mean(hat_ctr.4_is_covered),
                 cov.ctr5=mean(hat_ctr.5_is_covered),
                 cov.cgr1=mean(hat_cgr.1_is_covered),
                 cov.cgr2=mean(hat_cgr.2_is_covered),
                 cov.cgr3=mean(hat_cgr.3_is_covered),
                 cov.cgr4=mean(hat_cgr.4_is_covered),
                 cov.cgr5=mean(hat_cgr.5_is_covered))                 

##



simo <- get_data_wrapper(job=list(id=8), static=xx3, nreps=3, mode="sim",
                         ip_dispersion=0.5, tr1=-5.78, tr2=1.912, tr3=0.484,
                         tr4=-0.09, tr5=-1.01, gr1=-1.465935, gr2=0.98,
                         gr3=0.008, gr4=0.00265, gr5=-0.86, maxinftime="limit2")
res <- get_est(job=list(id=8), static=xx3, dynamic=simo, gr_intercept="yes",
               gr_terms="set1", limit_staff_to_one_day=FALSE)
##
addExperiments(reg, prob.designs=prob2.design,
               algo.designs=alg.design, repls=repls)


simo <- get_data_wrapper(job=list(id=8), static=xx4, nreps=1, mode="sim",
                         ip_dispersion=0.5, tr1=-5.78, tr2=1.912, tr3=0,
                         tr4=0, tr5=0, gr1=-1.465935, gr2=0.98,
                         gr3=0, gr4=0, gr5=0)
res <- get_est(job=list(id=8), static=xx3, dynamic=simo, gr_intercept="yes",
               gr_terms="facility", limit_staff_to_one_day=FALSE)
