library(plyr)
library(reshape2)
library(ggplot2)
library(AvonData)

load("estBoot-reduction.RData")

predNames <- c('facility', 'is_small', 'season', 'CaseType')
predictors <- xx4[, predNames]
dups <- duplicated(predictors)
predUniq <- predictors[!dups, ]
containsMissing <- function(x) any(is.na(x))
nas <- apply(predUniq, 1, containsMissing)
predClean <- predUniq[!nas, ]

mmatUnique <- model.matrix(~1 + facility + is_small + season + CaseType,
                           data=predClean)

tmpf <- function(x) paste(x, 1:5, sep=".")
cNames <- lapply(c(tr="hat_ctr", gr="hat_cgr"), tmpf)

tmpf <- function(x) {
  ests <- reduction[, x]
  ests <- data.matrix(ests)
  mmatUnique %*% t(ests)
}
preds <- lapply(cNames, tmpf)
preds$X0 <- preds$gr - preds$tr

foo <- cbind(exp(preds$X0), predClean)
baz <- melt(foo, id=predNames)
dat <- ddply(baz, c('facility', 'is_small', 'CaseType'), summarize,
             lower=quantile(value, probs=.25, na.rm=TRUE),
             upper=quantile(value, probs=.95, na.rm=TRUE))

spruceLabels <- function(df){
  df$facility <- factor(df$facility, levels=2:1, labels=c("LTCF", "Hospital"))
  df$is_small <- factor(df$is_small, levels=c(TRUE, FALSE),
                         labels=c('Small', 'Large'))
  df$CaseType <- factor(df$CaseType, levels=c('patient', 'staff'),
                         labels=c('Patient', 'Staff'))
  df
}
dat <- spruceLabels(dat)

## Get outside data on unit sizes for validation
data(AvonOutbreaks)
key <- match(xx4$OutbreakNumber, AvonOutbreaks$OutbreakNumber)
popNames <- c("UnitBeds", "UnitStaff", "MaxResidents", "CareStaff", "OutbreakNumber",
              "StaffAffected", "ResidentsAffected")
popData <- cbind(xx4[, predNames], AvonOutbreaks[key, popNames])
popData <- spruceLabels(popData)

popData$staffDat <- NA
test <- popData$facility=='Hospital' & popData$CaseType =='Staff'
popData$valDat[test] <- popData$UnitStaff[test]
test <- popData$facility=='Hospital' & popData$CaseType =='Patient'
popData$valDat[test] <- popData$UnitBeds[test]
test <- popData$facility=='LTCF' & popData$CaseType =='Staff'
popData$valDat[test] <- popData$CareStaff[test]
test <- popData$facility=='LTCF' & popData$CaseType =='Patient'
popData$valDat[test] <- popData$MaxResidents[test]
popData <- popData[!is.na(popData$valDat),]

g <- ggplot(dat, aes(x=CaseType))
g <- g + geom_linerange(aes(ymin=lower, ymax=upper), size=30, colour='gray')
g <- g + facet_grid(facility ~ is_small)
g <- g + scale_y_log10()

g <- g + geom_jitter(data=popData, aes(x=CaseType, y=valDat),
                      position=position_jitter(height=0, width=0.25))

g <- g + xlab('Case type') + ylab('Initial susceptibles')

g <- g + theme_bw(base_size=12)
g <- g + theme(panel.grid.minor=element_blank())
g <- g + theme(panel.grid.major=element_blank())
g <- g + theme(panel.background=element_blank())
g <- g + theme(strip.background=element_blank())
g <- g + theme(panel.border=element_blank())
g <- g + theme(axis.line=element_line())
g <- g + theme(strip.text.y=element_text())

ggsave(g, filename="predicted-X0-vs-unit-sizes.eps",
       pointsize=12, width=5.5, height=5.5)

tab <- ddply(popData, c('facility', 'season', 'is_small'), summarize,
             nOutbreaks=length(unique(OutbreakNumber)))

cleanf <- function(x){
    ## source: http://stackoverflow.com/q/5465314/548147
    oldx <- c(FALSE, x[-1]==x[-length(x)])
      ## is the value equal to the previous?
      res <- x
      res[oldx] <- NA
      return(res)
  }
tab$facility <- cleanf(tab$facility)
tab$facility <- factor(tab$facility,
                       levels=c('LTCF', 'Hospital'),
                       labels=c('LTCF', 'hospital'))
tab$season <- cleanf(tab$season)
tab$is_small <- factor(tab$is_small,
                       levels=c('Small', 'Large'),
                       labels=c('small', 'large'))

colnames(tab) <- c('facility', 'season', 'size class', '# outbreaks')
xtab <- xtable::xtable(tab)
print(xtab, include.rownames=FALSE, file='tab_nOutbreaks.tex')
