#
# Fitting regression models to data
# CNS Short Course
#
# John Magnotti
# magnotti@bcm.edu
#
# see accompanying ppt for context
#

# set some output options
par(pch=19, las=1, col='gray30', tcl=-0.3, bty='n')

procedure_colors <- c('A' = 'dodgerblue3', 'B' = 'orange')

hist <- function(x, ...) {
    graphics::hist(x, border='gray40', col='gray60', ...)
}

error_bars <-function(x, y_with_err, code = 0, ...) {
    arrows(x0 = x, y0 = y_with_err[,'lwr'], y1 = y_with_err[,'upr'], code = code)
}


# load the data
outcome_data = read.csv('outcomes.csv')

# attach the data so that the variables can be accessed more easily
attach(outcome_data)

# look at the outcome variable
hist(outcome, main = 'Distribution of Outcomes\n2014-2018')

# do people get better?
plot(pre_test, outcome, col=procedure_colors[procedure], ylim=c(0,100), xlim=c(0,100), bty='n')
abline(0,1)

# Fit the model
m1 = lm(outcome ~ pre_test + PreOpProt + PreOpRisk + procedure)

# An alternative model adding in other parameters
m2 = update(m1, ~ . + age + smoker)

# how well does the first model fit the data?
summary(m1)

# compare the two models (put the more complex model second)
anova(m1, m2)

# plot the predicted scores vs. the actual scores
plot(predict(m2), outcome, col=procedure_colors[procedure], xlim=c(0,100), ylim=c(0,100))
abline(0,1, lty=2, lwd=2)

# get coefficients and fit statistics for model 2
summary(m2)

# what about an interaction with age?
m3 = update(m2, ~ . + age:procedure)
summary(m3)


# plot improvement across age
plot(x=age, y=improvement, col=procedure_colors[procedure])

# get a fit line for each procedure by age
fitted = by(outcome_data, procedure, lm, formula=improvement ~ age)

# draw the fit line for each
for (i in 1:2) abline(fitted[[i]], col=procedure_colors[i], lwd=2, lty=2)



# predict outcomes for some new patients
new_patients = read.csv('new_patients.csv')

# using prediction intervals, not "confidence" intervals, because we're interested in individuals, not the mean of individuals
predicted_outcome = predict(m3, newdata=new_patients, interval='prediction')

# look at predictions
plot(new_patients$pre_test, predicted_outcome[,'fit'], ylim=c(0,100), xlim=c(0,100),
     col=ifelse(new_patients$pre_test - predicted_outcome[,'fit'] > 0, 'orangered', 'gray50'))
error_bars(new_patients$pre_test, predicted_outcome)
abline(0,1)

# which participants are predicted to have outcome < pre_test
ind = which(predicted_outcome[,'fit'] < new_patients$pre_test)

#display the records
new_patients[ind,]

# what would be prediction if they did procedure A?
test = new_patients[ind,]
test$procedure = 'A'
new_fits = predict(m3, test)

#Compare predictions if procedure is changed
plot(new_patients$pre_test, predicted_outcome[,'fit'],
     col='gray70', ylim=c(0,100), xlim=c(10,70),
     xlab='Pre-surgery score', ylab='Predicted Outcome')
abline(0,1)
points(test$pre_test, new_fits, col='orangered')
arrows(x0=test$pre_test, y0=predicted_outcome[ind,'fit'], y1=new_fits,
       col='orangered', lwd=2)

# large outcome improvement predicted for these patients
new_fits - predicted_outcome[ind,'fit']

