source("basic_helper.R")

#
#
# Core functions for building and testing McGurk Variable Encoding Threshold model
#
#

# # # # #	Deterministic Encoding	# # # # #

#
# For the deterministic models, we just check that the disparityicacy is at or above threshold
predict.det = function(object, disparity, ...) {
	return(as.integer(disparity >= object$threshold))
}

#
# Subjects have fixed threshold at "0"
# Rank the movies based on pF then shift so that pF >= .5 implies disparity >= 0
optim.d0 = function(pFmat) {
	threshold = 0

	disparity = as.numeric(rank(colMeans(pFmat, na.rm = T)))
	shift = min(disparity[which(colMeans(pFmat, na.rm = T) >= 0.5)])
	disparity = disparity - shift

	res = list()
	res$subjs = lapply(1:nrow(pFmat), function(i) {
		a = list(threshold = threshold, rmse = Inf, y = pFmat[i, ])
		class(a) = "det"
		return(a)
	})

	res$disparities = disparity

	for (i in 1:nrow(pFmat)) {
		res$subjs[[i]]$rmse = model_fit(root_mean_squared_error, res$subj[[i]], res$disparities)
	}

	res$rmse.total = rmse_total(res$subjs)

	return(res)
}

#
# Subjects have independent thresholds
# Rank the movies, then find subjects thresholds based on pF > .5 for individual subject
optim.d1 = function(pFmat) {
	disparity = rank(colMeans(pFmat, na.rm = T))

	res = list()
	res$subjs = parRapply(cl, pFmat, optim.subj.d1, disparity = disparity)

	res$disparities = disparity

	res$rmse.total = rmse_total(res$subjs)

	return(res)
}

#
# Per subject thresholding. Separate function so we can multi-thread this process
optim.subj.d1 = function(y, disparity) {
	source("mcg_model_functions.R")

	# get all possible thresholds	
	r = c(unique(disparity), max(disparity) + 1)

	# build subjs using all possible thresholds
	subjs = lapply(r, function(threshold) {
		a = list(threshold = threshold, rmse = Inf, y = y)
		class(a) = "det"
		return(a)
	})

	# pick the one with the lowest sse
	best = subjs[[which.min(model_fit(sum_squared_error, subjs, disparity))]]

	# grab the RMSE for the best subject
	best$rmse = model_fit(root_mean_squared_error, best, disparity)

	return(best)
}


# # # # #	Variable Encoding Models	# # # # #

# Specify the range of critical parameters here:
DISPARITY_RANGE = c(0, 2)
THRESHOLD_RANGE = c(0, 2)
SENSORY_NOISE_RANGE = c(1e-3, 1)


#
# subjects have 0 threshold point, fitted noise parameter
optim.p0 = function(pFmat, n.sim = 8 * 4, n.iter=7) {
	return(optim.var_model(pFmat, optim.subj.p0, n.sim, n.iter))
}

# Individual threshold and Sensory Noise
optim.p1 = function(pFmat, n.sim = 8 * 4, n.iter=7) {
	return(optim.var_model(pFmat, optim.subj.p1, n.sim, n.iter))
}

# Convenience function with proper name
optim.mcg = function(pFmat, n.sim = 8 * 4, n.iter=7) {
	return(optim.p1(pFmat, n.sim, n.iter))
}

# All disparityies = mean(DISPARITY_RANGE)
optim.s1 = function(pFmat) {
	res = list()
	disparities = rep(mean(DISPARITY_RANGE), ncol(pFmat))
	subjs = parRapply(cl, pFmat, optim.subj.p1, disparities=disparities, n.sim=250)

	#store the results
	res$subjs = subjs
	res$disparities = disparities
	res$rmse.total = rmse_total(subjs)
	
	return (res)
}

# P_X(McG) = P(X < subj_thresholdoff); X ~ N(movie_disparityicacy, subj_noise)
#
# Note that we clip the threshold \in THRESHOLD_RANGE and sd \in SENSORY_NOISE_RANGE to make sure the fitting procedure
# does lead to arbitrarily extreme values for the parameters.
# This occurs (for instance) when subjects always (never) report a fusion response
#
predict.var = function(object, disparity, ...) {
	object$threshold = clip(object$threshold, range=THRESHOLD_RANGE)
	object$sd = clip(object$sd, range=SENSORY_NOISE_RANGE)

	return(pnorm(object$threshold, disparity, object$sd, lower.tail = TRUE))
}


predict.var_vec = function(par, disparity) {
	return(pnorm(par[1], disparity, par[2], lower.tail = TRUE))
}

#only give ONE disparity
predict.var_mat = function(parmat, disparity) {
	return(pnorm(parmat[, 1], disparity, parmat[, 2]))
}


#
# build up the probabilistic encoding model using n.sim initial conditions
# Returns the best fitting model, model specified by optim.subj_model
# don't use this directly unless you know what you're doing
optim.var_model = function(pFmat, optim.subj_model, n.sim, n.iter, sample.sd=0.05) {

	# we take up the 1-colMeans(...) because LOW disparity ~> HIGH McG
	disp = exp(1 - colMeans(pFmat, na.rm = TRUE)) - 1
	
	#build up some randomized intial conditions that generally accord with the sample ranks
	disparities = t(array(rnorm(ncol(pFmat) * (n.sim-1), mean = disp, sd = sample.sd), c(ncol(pFmat), n.sim-1)))
	disparities[1,] = disp
	

	# wrap the fitting function with a counter and a timer
	fit_function = with_counter(with_timer(fit.var_model), n.sim)

	res = apply(disparities, 1, fit_function, pFmat = pFmat, subj_function = optim.subj_model, n.iter=n.iter)

	res.best = res[[which.min(sapply(res, function(r) r$rmse.total))]]

	return(res.best)
}

get_min = function(interval, subjs, idx, ...) {
	source('mcg_model_functions.R')
	optimize(optim_disparity.var, interval, subjs = subjs, idx = idx)$minimum
}

fit.var_model = function(disparities, pFmat, subj_function, n.iter = 5) {
	res = list()

	subjs = parRapply(cl, pFmat, subj_function, disparities = disparities)

	res$subjs = subjs
	res$disparities = disparities
	res$rmse.total = rmse_total(subjs)

	for (qq in 1:n.iter) {
		disparities = parSapply(cl, seq(disparities), get_min, interval=DISPARITY_RANGE, subjs=subjs)
		subjs = parRapply(cl, pFmat, subj_function, disparities = disparities)

		if (rmse_total(subjs) < res$rmse.total) {
			res$subjs = subjs
			res$disparities = disparities
			res$rmse.total = rmse_total(subjs)
		}
		print(res$rmse.total)
	}
	return(res)
}

#
# get the sse for a given disparityicacy
optim_disparity.var = function(disparity, subjs, idx, ...) {

	# create a new subj list that only contains the data corresponding to idx
	subjs = lapply(subjs, function(subj) {
		subj$y = subj$y[idx]
		return(subj)
	})

	return(sum(model_fit(sum_squared_error, subjs, disparity), na.rm = T))
}

#
# helps get SSE for the two variable-encoding model types
optim_help.var = function(par, subj, disparity) {
	subj$sd = par[1]

	if (length(par) > 1) {
		subj$threshold = par[2]
	}

	return(model_fit(sum_squared_error, subj, disparity))
}

#
# changing SD only
optim.subj.p0 = function(y, disparities) {
	source("mcg_model_functions.R")

	subj = list(threshold = 0, sd = 0, rmse = Inf, y = y)
	class(subj) = "var"

	h = optimize(optim_help.var, SENSORY_NOISE_RANGE, subj = subj, disparity = disparities)

	subj$sd = h$minimum

	# we can't just take h$objective here because we minimize sse and we want RMSE
	# minimizing RMSE would require an extra work each pass
	subj$rmse = model_fit(root_mean_squared_error, subj, disparities)

	return(subj)
}

#
# changing sd and threshold
optim.subj.p1 = function(y, disparities, n.sim = 25) {
	source("mcg_model_functions.R")

	subj = list(threshold = 0, sd = 0, rmse = Inf, y = y)
	class(subj) = "var"

	pars = (cbind(runif(n.sim, SENSORY_NOISE_RANGE[1], SENSORY_NOISE_RANGE[2]), runif(n.sim, THRESHOLD_RANGE[1], THRESHOLD_RANGE[2])))

	res = apply(pars, 1, optim, optim_help.var, subj = subj, disparity = disparities)

	best_res = res[[which.min(sapply(res, function(r) r$value))]]

	subj$sd = best_res$par[1]
	subj$threshold = best_res$par[2]

	# we can't just take best_res$value here because we minimize sse and we want RMSE
	# minimizing RMSE would require extra work each pass
	subj$rmse = model_fit(root_mean_squared_error, subj, disparities)

	return(subj)
}

# # # # #	Model fit assessment functions	# # # # #

#
# easy accessor that includes useful presets for getting a model_fit
get_fit = function(model, npar, n.par.movie = 14) {
	model_fit(rmse.corr, model$subjs, model$disparities, n.par = npar + n.par.movie/length(model$subjs))
}

#
# This gives access to the fit/error functions when all you have is a model(s)
# Because it uses the generic 'predict' function, we can use this function for all of our model types
# The function will also accept a list of models and return an array of results
model_fit = function(fit_function, model, disparity, ...) {
	if (!is.vector(model)) {
		y.hat = predict(model, disparity)
		y = model$y

		idx = !is.na(y)

		return(fit_function(y[idx], y.hat[idx], ...))
	}

	return(sapply(model, function(m) model_fit(fit_function, m, disparity, ...)))
}


neg_binom_logLik = function(y, y.hat, N, minP=1e-3, maxP=1-minP) {
	p = y.hat
	p[p<minP] = minP
	p[p>maxP] = maxP

	q = 1-p
	return(-sum(log(p) * N * y + log(q) * N * (1 - y)))
}


# this function will also work if one (or both) of the inputs are scalars
sum_squared_error = function(y, y.hat) {
	return(sum((y - y.hat)^2))
}

# sum of absolute error rather than sq error
sum_abs_error = function(y, y.hat) {
	return(sum(abs(y - y.hat)))
}


# use this if you need per-item error rates
squared_error = function(y, y.hat) {
	return((y - y.hat)^2)
}

# sqrt(SSE/N)
root_mean_squared_error = function(y, y.hat, n = length(y)) {
	return(sqrt(sum_squared_error(y, y.hat)/n))
}

sd_squared_error = function(y, y.hat) {
	return(sd(squared_error(y, y.hat)))
}

#
# making this a named function for clarity
rmse_total = function(subjs) {
	return(sum(sapply(subjs, function(s) s$rmse)))
}

#
# Corrected RMSE from Massaro
rmse.corr = function(y, y.hat, n = length(y), n.par) {
	return((n/(n - n.par)) * root_mean_squared_error(y, y.hat))
}

#
# Adjusted R2
r2.adj = function(y, y.hat, n = length(y), n.par) {
	sse.c = sum_squared_error(y, y.hat)/(n - n.par - 1)
	sst.c = sum_squared_error(y, mean(y))/(n - 1)

	return(1 - sse.c/sst.c)
}

#
# Correlation
correlation = function(y, y.hat, method = "pearson") {
	return(cor(y, y.hat, method = method))
}

