smidge <- 1e-8

del_rows <- function(df, frac){
  stopifnot(all(frac <= 1, frac >= 0))
  N <- nrow(df)
  n <- ceiling(N * (1 - frac))
  row_numbers <- sample.int(N, n)
  df[row_numbers,]
}    

na_recov <- function(df, frac){
  stopifnot(all(frac <= 1, frac >= 0))
  N <- nrow(df)
  n <- floor(N * frac)
  row_numbers <- sample.int(N, n)
  df$recovered_start[row_numbers] <- NA
  df
}    

mod_records <- function(simout, obc, frac, FUN){
  tmpf <- function(x){
    ret <- list()
    ind <- which(x$outbreak_code[1] == obc)
    thefrac <- frac[ind]
    FUN(df=x, frac=thefrac)    
  }
  ret <- dlply(simout, c("outbreak_code"), tmpf)
  rbind.fill(ret)
}

get_data_wrapper <- function(job, static, ..., tr1=1, tr2=0, tr3=0, tr4=0,
                             tr5=0, gr1=5+tr1, gr2=0, gr3=0, gr4=0, gr5=0,
                             maxinftime="none"){
  tr <- c(tr1, tr2, tr3, tr4, tr5)
  gr <- c(gr1, gr2, gr3, gr4, gr5)
  ip <- c(3, 0, 0, 0, 0)
  if(maxinftime == "limit2"){
    maxinftime <- c(-1, 1)
  } else if (maxinftime == "none"){
    maxinftime <- c(-1, -1)
  }
  get_data(static=static, jump=job$id,
           trans_rate_model="~facility + is_small + season + CaseType",
           trans_rate_coefs=tr, ip_coefs=ip, growth_rate_coefs=gr,
           maxinftime=maxinftime, ...)
}

get_data_wrapper2 <- function(job, static, ..., maxinftime="limit2"){
  data(estData)
  tr <- estData$fit$admb$coeflist$ctr
  gr <- estData$fit$admb$coeflist$cgr
  ip <- coef(estData$ip_fit$m)
  disp <- estData$ip_fit$disp
  if(maxinftime == "limit2"){
    maxinftime <- c(-1, 1)
  } else if (maxinftime == "none"){
    maxinftime <- c(-1, -1)
  }
  get_data(static=static, jump=job$id, ip_dispersion=disp,
           trans_rate_model="~facility + is_small + season + CaseType",
           trans_rate_coefs=tr, ip_coefs=ip, growth_rate_coefs=gr,
           maxinftime=maxinftime, ...)
}


get_data <- function(static, jump=0, mode=c("simulate", "passthrough"),
                     trans_rate_model="~facility", trans_rate_coefs, 
                     ip_model=trans_rate_model,
                     ip_coefs=trans_rate_coefs,
                     ip_dispersion=1, maxinftime,
                     growth_rate_model=trans_rate_model,
                     growth_rate_coefs=10*trans_rate_coefs,
                     nreps=1, latent_period=1, floor_times=TRUE,
                     case_records, logtr=TRUE, loggr=logtr, logip=logtr, ...){
  Mtr <- model.matrix(as.formula(trans_rate_model), data=static)
  Mip <- model.matrix(as.formula(ip_model), data=static)
  Mgr <- model.matrix(as.formula(growth_rate_model), data=static)
  valid_rows <- intersect(rownames(Mtr), rownames(Mip))
  valid_rows <- intersect(valid_rows, rownames(Mgr))
  data <- static[rownames(static) %in% valid_rows, ]
  mode <- match.arg(mode)
  if (mode == "passthrough"){
    sel <- unique(data$outbreak_code)
    case_records <- case_records[case_records$outbreak_code %in% sel, ]
    case_records <- case_records[!is.na(case_records$OnsetDate), ]    
    foo <- case_records[,c("OutbreakNumber", "CaseType",
                           "outbreak_code")]
    times <- case_records[, c("OnsetDate", "X1stSymptomFreeDate")]
    foo <- cbind(foo, data.matrix(times))
    # So all outbreaks start at 0
    mins <- ave(foo$OnsetDate, foo$OutbreakNumber, FUN=min)
    bar <- foo[, c("OnsetDate", "X1stSymptomFreeDate")] - mins
    foo[, c("OnsetDate", "X1stSymptomFreeDate")] <- bar
    # So headers match simulation output
    foo$CaseType <- factor(foo$CaseType)
    ind <- match("OnsetDate", colnames(foo))
    colnames(foo)[ind] <- "infective_start"
    ind <- match("X1stSymptomFreeDate", colnames(foo))
    colnames(foo)[ind] <- "recovered_start"
    foo$outbreak_id <- foo$OutbreakNumber
    case_records <- foo
  } else if (mode == "simulate"){
    if (!logtr){
      ytr <- Mtr %*% trans_rate_coefs
    } else {
      ytr <- exp(Mtr %*% trans_rate_coefs)
    }
    if (!logip){
      yip <- Mip %*% ip_coefs
    } else {
      yip <- exp(Mip %*% ip_coefs)
    }
    if (!loggr){
      ygr <- Mgr %*% growth_rate_coefs
    } else {
      ygr <- exp(Mgr %*% growth_rate_coefs)
    }
    X0 <- ygr / ytr
    obn <- data$OutbreakNumber
    fobn <- factor(obn)
    irow <- as.integer(fobn)
    ctf <- factor(data$CaseType)
    ctf_ints <- unique(as.integer(ctf))
    ctf_levels <- levels(ctf)
    icol <- as.integer(ctf)
    m <- ic <- s <- p <- l <- b <- a <- matrix(0, max(irow), max(icol))
    if (missing(maxinftime)){
      m[,] <- -1L
    } else {
      colno <- sort(unique(icol))
      stopifnot(length(maxinftime) == length(colno))
      for(j in colno){
        m[,j] <- maxinftime[j]
      }
    }
    for(k in seq_len(nrow(data))){
      i <- irow[k]; j <- icol[k]
      ic[i, j] <- data$initialInf[k]
      s[i, j] <- X0[k]
      p[i, j] <- ytr[k]
      l[i, j] <- latent_period
      b[i, j] <- yip[k] * ip_dispersion
      a[i, j] <- 1 / ip_dispersion
    }
    prep_sim()
    prefix <- paste("simo_", jump, "_", sep="")
    simout <- run_sim(initial_cases=ic, susceptibles=s,
                       proportionalities=p, latent_periods=l, betas=b,
                      alphas=a, nreps=nreps, jump=jump, maxinftime=m,
                      typecode=ctf_ints, obn=as.integer(levels(fobn)),
                      workdir=tempfile(prefix))
    simout$OutbreakNumber <- simout$obn
    simout$CaseType <- factor(simout$type, levels=ctf_ints,
                              labels=ctf_levels)
    simout$outbreak_code <- with(simout,
                                 paste(OutbreakNumber, CaseType, sep="_"))
    simout <- mod_records(simout, obc=data$outbreak_code,
                          frac=data$propOnsetNA, FUN=del_rows)
    simout <- mod_records(simout, obc=data$outbreak_code,
                          frac=data$propFreeImp, FUN=na_recov)
    if(floor_times){
      timevars <- paste(c("latent", "infective", "recovered"),
                        "start", sep="_")
      tmpf <- function(piece){
        piece[, timevars] <- piece[, timevars] + runif(1L)
        piece
      }
      simout <- ddply(simout, "outbreak_id", tmpf)
      foo <- floor(simout[, timevars])
      foo <- apply(foo, 2, as.integer)
      simout[, timevars] <- foo
      simout
    }
    case_records <- simout    
  }
  case_records$symp_obs <- with(case_records,
                                recovered_start - infective_start)
  if (any(is.na(case_records$symp_obs))){
    case_records$med <- with(case_records,
                             ave(symp_obs, outbreak_code,
                                 FUN=function(x) median(x,na.rm=TRUE)))
    case_records$symp_imp <- with(case_records,
                                  ifelse(is.na(symp_obs),
                                         med, symp_obs))
    case_records$recovered_original <- case_records$recovered_start
    case_records$recovered_start <- with(case_records,
                                         infective_start + symp_imp)
  }
  case_records
}
