#
# Model description file
#


#
# Start with the most basic model I can imagine.
# Subjects have a specific tolerance for asynchrony
# if the noisily-encoded asynchrony is within their tolerance, they report SYNC, 
# otherwise they say ASYNC.
#

# define some ranges for variables, units are ms
SENSORY_NOISE_RANGE = c(1, 500)
THRESHOLD_RANGE = c(-750, 750)

# the predict function requires subject parameters and
# some physical asynchronies
predict.symmetric_threshold_model = function(pars, asynchronies) {
    
    # the clip_variable function (defined at the bottom of this file) ensures parameters
    # don't go run off to unrealistic values. Only change these after thinking it through
    threshold = clip_variable(pars[1], c(-500,500))
    sensory_noise = clip_variable(pars[2], c(1, 500))
    
    # because there is a single threshold, the model doesn't care about the direction of the
    # asynchrony -- we know empirically this isn't a good model, but it's worth seeing how it does
    
    # what is the probability that a noisily encoded asynchrony will be within the subjects tolerance?
    
    # less than the +threshold
    upper = pnorm(q=threshold, mean=asynchronies, sd=sensory_noise, lower.tail = TRUE)
    
    # we need to subtract off the probability that it is less than the -threshold
    lower = pnorm(q=-threshold, mean=asynchronies, sd=sensory_noise, lower.tail = TRUE)
    
    upper - lower
}

#
# Basic cost function
# y and yhat should be in the same units (e.g., both proportions)
#
cost.sse = function(y, yhat) {
    sum((y-yhat)^2)
}

#
# Optim function using cost.sse as it's objective
#
# requires:
# y: input data as P(Sync)
# asynchronies: tested asynchronies
# initial_pars: just take a guess (or run this several times with different initial conditions)
# pred.model: which function you want to use to predict synchrony, defaults to predict.single_threshold_model
optim.model_sse = function(y, asynchronies, initial_pars, pred.model=predict.symmetric_threshold_model) {
    
    # a helper function that will be called by optim (see below) repeatedly with new values of pars
    # NB: This could also be an anonymous function given directly to optim
    optim_function = function(pars) {
        yhat = pred.model(pars, asynchronies)
        cost.sse(y, yhat)
    }
    
    # calls optim_function repeatedly until the value of cost.sse stops decreasing
    optim(initial_pars, optim_function)
}





#
# It stands to reason that subjects might have a different threshold for A before V than
# for V before A. This model is identical to a symmetric threshold model with location
# shift.
#
predict.asymmetric_threshold_model = function(pars, asynchronies) {
    
    # make sure the lower threshold is lower than the upper, and not arbitrarily low
    threshold_low = clip_variable(pars[1], c(THRESHOLD_RANGE[1], pars[2]))
    
    # make sure the upper threshold is above than the lower, and not arbitrarily high
    threshold_high = clip_variable(pars[2], c(threshold_low, THRESHOLD_RANGE[2]))
    
    # don't let noise be less than 1ms, or greater than twice the larger asynch
    sensory_noise = clip_variable(pars[3], SENSORY_NOISE_RANGE)
    
    # prob less than threshold_high
    upper = pnorm(q=threshold_high, mean=asynchronies, sd=sensory_noise, lower.tail = TRUE)
    
    # we need to subtract off the probability that it is less than threshold_low
    lower = pnorm(q=threshold_low, mean=asynchronies, sd=sensory_noise, lower.tail = TRUE)
    
    upper - lower
}


BIAS_RANGE = c(-1, 1)

# dt model with bias
predict.asymmetric_threshold_bias_model = function(pars, asynchronies) {
    bias = clip_variable(pars[4], BIAS_RANGE)
    
    p_at = predict.asymmetric_threshold_model(pars, asynchronies)
    
    pred = p_at + bias
    
    # don't predict outside the realm of possibility
    pred[pred < 0] = 0
    pred[pred > 1] = 1
    
    pred
}


#
# take into account the number of parameters
# rmse / df.resid
# cf. Massaro 1998
cost.rmse_corr = function (y, yhat, n.par) {
    n_conditions = length(y)
    
    sse = sum( (y-yhat)**2 )
    
    rmse_corrected = sqrt( sse/n_conditions ) * ( n_conditions/(n_conditions - n.par) )
    
    return (rmse_corrected)
}

#
# binomial negative log-likelihood cost function
#

# y should be count, yhat should be a probability
# we specify a limit for yhat because we need to take the log
cost.negative_binom_LL = function(y, p_yhat, n, p_limits=c(1e-3, 1 - 1e-3)) {
    
    p_yhat = clip_variable(p_yhat, p_limits)
    
    q_yhat = 1-p_yhat
    
    s = y * log(p_yhat)
    f = (n-y) * log(q_yhat)
    
    -sum(s + f)
}

# 
# scaled nLL that takes into account n.par and n
cost.BIC = function(y, p_yhat, n, n.par, p_limits=c(1e-3, 1-1e-3)) {
    nLL = cost.negative_binom_LL(y, p_yhat, n, p_limits)
    
    2 * nLL + log(sum(n))*n.par
}



# exercise for the reader: how would you optimize negLL instead of cost.sse?
# optim.model_nLL = function(y, asynchronies, n, initial_pars, model) {
#     
# }



#
# PLOTTING FUNCTIONS
#



# simple line plot with some handy defaults
# draw_peak (default true) adds a vertical line at the asynchrony corresponding to max yhat
#   if there are multiple maxima, take the mean of the maxima
plot.sj = function(yhat, add=FALSE, col='black', type='l', lwd=2, pch=16, draw_peak=FALSE, peak_lty=2, peak_col=col, ...) {
    if(!add) {
        plot(asynchronies, yhat, type=type, col=col, las=1, pch=pch,
             ylim=0:1, ylab='P(Sync)', xlab='Asynchrony (ms)', lwd=lwd, ...)
    } else {
        lines(asynchronies, yhat, type=type, col=col, pch=pch, lwd=lwd, ...)
    }
    
    if(draw_peak) {
        peak = mean(asynchronies[which(yhat == max(yhat))])
        abline(v=peak, col=peak_col, lty=peak_lty)
    }
}


#
# Helper functions to make the code shorter
#
p.stm = function(pars) predict.symmetric_threshold_model(pars, asynchronies)

get_pars = function(fit) fit$par

# some util functions
clip_variable = function(x, lim) {
    l = min(lim)
    u = max(lim)
    
    x[x<l] = l
    x[x>u] = u
    
    x
}



#
# Other models that have guessing terms
#

GUESSING_RANGE = c(0.001, 0.75)

# single threshold model, but now with a guessing term / lapse rate
predict.symmetric_threshold_guessing_model = function(pars, asynchronies) {
    
    # this is the new guessing parameter
    # I'm putting an upper limit at 75% guessing
    # If subjects are near this level, the data are probably unusable anyway
    guess_rate = clip_variable(pars[3], GUESSING_RANGE)
    
    # get predictions from the single threshold model
    p_st = predict.symmetric_threshold_model(pars, asynchronies)
    
    # the final probabilities are a mixture of NOT guessing and guessing
    # NB: Guessing has the effect of pulling the tails off the floor and the peak off the ceiling
    # NB: this is distinct from a bias term, that would shift the function up or down
    (1-guess_rate)*p_st + guess_rate * 0.5
}


# double threshold model with guessing
predict.asymmetric_threshold_guessing_model = function(pars, asynchronies) {
    guess_rate = clip_variable(pars[4], GUESSING_RANGE)
    
    p_at = predict.asymmetric_threshold_model(pars, asynchronies)
    
    # the final probabilities are a mixture of NOT guessing and guessing
    (1-guess_rate)*p_at + guess_rate * 0.5
}
