library(AvonData)
library(lubridate)
library(plyr)
library(EpiGLM)

data(AvonOutbreaks)
data(caseRecordsFull)
xx <- caseRecordsFull

notes <- list()

## subset to cases attributed in whole or in part to norovirus
xx <- xx[grep("norovirus", xx$Organism),]

## Correct putative data-entry errors
ind <- which(mapply(function(x, y) x==76 & y=="2002-06-10",
                    xx$OutbreakNumber, xx$OnsetDate))
xx$OnsetDate[ind] <- "2002-10-06"
ind <- which(mapply(function(x, y) x==76 & y=="2002-04-10",
                    xx$OutbreakNumber, xx$OnsetDate))
xx$OnsetDate[ind] <- xx$X1stSymptomFreeDate[ind] <- "2002-10-04"
ind <- which(mapply(function(x, y) x==76 & y=="2002-11-15",
                    xx$OutbreakNumber, xx$X1stSymptomFreeDate))
xx$X1stSymptomFreeDate[ind] <- "2002-10-15"

## Remove dates that describe an impossible symptomatic
badDates <- with(xx, which(OnsetDate > X1stSymptomFreeDate))
xx$X1stSymptomFreeDate[badDates] <- NA
xx$OnsetDate[badDates] <- NA
notes$n_bad_dates <- length(badDates)
rm(badDates)

# Remove dates where CaseCode is missing
missing_case_code <- is.na(xx$CaseCode)
xx <- subset(xx, !missing_case_code)
notes$n_missing_case_code <- sum(missing_case_code)
rm(missing_case_code)

# Make less specific case code
renamer <- function(x) {
  ret <- switch(x, "Hosp staff"="staff", "NH staff"="staff",
                "Patient"="patient", "Resident"="patient")
  if(is.null(ret)) ret <- NA
  ret
}
renamer <- Vectorize(renamer)
xx$CaseType <- renamer(xx$CaseCode)
rm(renamer)
xx$outbreak_code <- with(xx, paste(OutbreakNumber, CaseType, sep="_"))

# Tabulate missing data 
xx$OnsetFreeingRep <- with(xx,interaction(!is.na(OnsetDate),
                                          !is.na(X1stSymptomFreeDate)))
xx2 <- ddply(xx, .(OutbreakNumber, CaseType), summarise,
             outbreak_code=outbreak_code[1],
             OBSize=length(OutbreakNumber),
             bothDates=sum(OnsetFreeingRep=="TRUE.TRUE"),
             onsetOnly=sum(OnsetFreeingRep=="TRUE.FALSE"),
             freeingOnly=sum(OnsetFreeingRep=="FALSE.TRUE"),
             noDates=sum(OnsetFreeingRep=="FALSE.FALSE"),
             initialInf=sum(OnsetDate == min(OnsetDate, na.rm=TRUE),
               na.rm=TRUE))
xx2$propFreeImp <- with(xx2, onsetOnly/(onsetOnly + bothDates))
xx2$propOnsetNA <- with(xx2, (noDates + freeingOnly)/(onsetOnly + bothDates))

## Subset according to tolerance for missing data
maxPONA <- 0.07
maxPRNA <- 0.55
xx2 <- subset(xx2, propFreeImp<=maxPRNA & propOnsetNA <= maxPONA)
xx <- subset(xx, outbreak_code %in% xx2$outbreak_code)

## Hack to include all types of susceptibles in all outbreaks in model
## matrix. Without doing this, simulator would sometimes simulate
## outbreaks with only the types observed to be infected. That does
## not agree with our model, which posits that the number of initial
## susceptibles should be a function of predictor variables. 
foo <- expand.grid(OutbreakNumber=unique(xx2$OutbreakNumber),
                   CaseType=unique(xx2$CaseType), stringsAsFactors=FALSE)
foo$outbreak_code <- with(foo, paste(OutbreakNumber, CaseType, sep="_"))
complement <- setdiff(foo$outbreak_code, xx2$outbreak_code)
foo <- subset(foo, foo$outbreak_code %in% complement)
foo <- cbind(foo, initialInf=0)
xx2 <- rbind.fill(xx2, foo)

# Get predictor vars
starts <- ddply(xx, "OutbreakNumber", summarize,
                month=min(month(OnsetDate), na.rm=TRUE))
starts$season <- NA
starts$season[starts$month %in% c(10:12, 1:3)] <- "fall-winter"
starts$season[starts$month %in% 4:9] <- "spring-summer"
key <- match(xx2$OutbreakNumber, starts$OutbreakNumber)
xx2$season <- starts$season[key]
rm(starts)
key <- match(xx2$OutbreakNumber, AvonOutbreaks$OutbreakNumber)
xx2$facility <- AvonOutbreaks$HomeHos[key]
AvonOutbreaks$cap <- with(AvonOutbreaks,
                          ifelse(is.na(UnitBeds), MaxResidents,
                                 UnitBeds))
AvonOutbreaks$medians <- with(AvonOutbreaks,
                              ave(cap, HomeHos,
                                  FUN=function(x) median(x, na.rm=TRUE)))
AvonOutbreaks$is_small <- with(AvonOutbreaks, cap < medians)
xx2$is_small <- AvonOutbreaks$is_small[key]
rm(key)

## Take a few notes

notes$noutbreaks <- length(unique(xx2$OutbreakNumber))
notes$ncases <- nrow(xx)

## Fitting

xx2$scale <- 1
xx2$CaseType <- as.factor(xx2$CaseType)
xx2$season <- as.factor(xx2$season)

xx4 <- xx2
xx4$propFreeImp[is.na(xx4$propFreeImp)] <- 0.078
xx4$propOnsetNA[is.na(xx4$propOnsetNA)] <- 0.0014

##

paso <- get_data(static=xx4, mode="passthrough", case_records=xx)

estData <- get_est(job=list(id=-1), static=xx4, dynamic=paso, method="admb",
                   gr_intercept="yes", gr_terms=c("set1"),
                   limit_staff_to_one_day=TRUE)
estData$coefnames
summary(estData$fit$admb)
notes

estData$ip_fit$m$data <- NULL
save(estData, file="estData.RData")
save(xx4, file="staticData.RData")

estDataDat <- get_est(job=list(id=-1), static=xx4, dynamic=paso,
                      method="admb", gr_intercept="yes",
                      gr_terms=c("set1"), returndat=TRUE,
                      limit_staff_to_one_day=TRUE)

save(estDataDat,file=file.path("estDataDat.RData"))
save(paso, file=file.path("..", "..", "AvonData", "data",
             "case-records-filtered.RData"))
q(save="no")
