# Copyright 2007, Wayne G. Shreffler. Free non-comercial use is welcome.
# This is a R analysis script for analysis of data set produced in collaboration with Annebeth Flinterman, Edward Knol and others. 
#R is by the R Foundation for Statistical Computing and is freely available at http://www.R-project.org. R Development Core Team (2006). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL.

# THIS WILL RUN WITHOUT MODIFICATION ON OS X but will require editing of pipe functions and substitution of quartz functions (e.g. with 'pdf') for use on Windows OS.

# Direct questions or suggestions about this script to the author at wayne.shreffler@mssm.edu.


#functions
#stdev <- function(x) (sqrt(var(x)/length(x)))/mean(x)
#cutoff <- function(x) (mean(x) + 3*stdev(10+x))
current_dir <- readLines(pipe("pwd"))
download.file("http://www.iisinai.org/shreffler/flinterman_data.zip", "flinterman_data.zip")
pipe("unzip 'flinterman_data.zip'", "w")


BATCH <-function(directory, exportname="R_sum.csv", descrptive_header=TRUE, export=TRUE,  skip_num=59) {
dir <- readLines(pipe(paste("ls", directory)))
pathname <- paste(directory, "/", sep="")
FILENUM <- 0
non_csv <- 0
cv <- function(x) 100*((sqrt(var(x)/length(x)))/mean(x)) #define CV
cat("processing . . .\n")
for (step in 1:length(dir)) {
	filename <- paste(pathname, dir[step], sep="")
	shortname <- dir[step]; corename <- substring(shortname, 1, nchar(shortname)-3)
	csv_test <- substring(filename, nchar(filename)-2, nchar(filename))
	boo <- any(as.logical(grep(csv_test, "csv")))
	if (boo) {
		if (FILENUM == 0) FILENUM<-1 else FILENUM <- 2 # used below for define of data_array
		if (descrptive_header) { # option for running manually stripped files
			TABLE <- read.table(filename, skip=skip_num,sep=",",check.names=FALSE, header=TRUE, fill=TRUE, strip.white=TRUE) 
			ROWS <- length(TABLE[[1]])		#GET TABLE LENGTH TO LEAVE OFF LAST ROW
			TABLE <- read.table(filename, skip=skip_num,sep=",",check.names=FALSE, header=TRUE, fill=TRUE, strip.white=TRUE, nrows=ROWS-1)}
		else {TABLE <- read.table(filename, skip=skip_num,sep=",",check.names=FALSE, header=TRUE, fill=TRUE, strip.white=TRUE) }
		cat(paste(filename, "\n"))
		TABLE$Ch1_Median <- log(TABLE$Ch1_Median,10) # log transform
		TABLE$Ch1_Median <- TABLE$Ch1_Median/as.numeric(summary((TABLE$Ch1_Median))[3]) #normalize
		TABLE$Ch1_SignalNoiseRatio < log(TABLE$Ch1_SignalNoiseRatio,10)
		TABLE$Ch1_SignalNoiseRatio <- TABLE$Ch1_SignalNoiseRatio/as.numeric(summary((TABLE$Ch1_SignalNoiseRatio))[3])
		TABLE$Ch2_Median <- log(TABLE$Ch2_Median,10) # log transform
		TABLE$Ch2_Median <- TABLE$Ch2_Median/as.numeric(summary((TABLE$Ch2_Median))[3]) #normalize
		TABLE$Ch2_SignalNoiseRatio < log(TABLE$Ch2_SignalNoiseRatio,10)
		TABLE$Ch2_SignalNoiseRatio <- TABLE$Ch2_SignalNoiseRatio/as.numeric(summary((TABLE$Ch2_SignalNoiseRatio))[3]) #normalize
		tablef <- factor(TABLE$Name) #create factor levels
		# IgE data (channel 1)
		E_sum_I  <- tapply(TABLE$Ch1_Median, tablef, mean)
		E_sum_SNR <- tapply(TABLE$Ch1_SignalNoiseRatio, tablef, mean)
		E_sum_CV <- tapply(TABLE$Ch1_Median, tablef, cv)
		E_sum_N <- tapply(TABLE$Ch1_Median, tablef, length)
		# IgG4 data (channel 2)
		G_sum_I  <- tapply(TABLE$Ch2_Median, tablef, mean)
		G_sum_SNR <- tapply(TABLE$Ch2_SignalNoiseRatio, tablef, mean)
		G_sum_CV <- tapply(TABLE$Ch2_Median, tablef, cv)
		G_sum_N <- tapply(TABLE$Ch2_Median, tablef, length)
		# ratio data
		EtoG_I <- E_sum_I/ G_sum_I
		EtoG_SNR <- E_sum_SNR/ G_sum_SNR
		#quartz(); plot(sum_I, sum_CV, main=paste(corename, "raw data"))
		data_sum <- data.frame(subject=corename,element=row.names(E_sum_I),IgE_int=E_sum_I, IgE_SNR=E_sum_SNR, E_CV=E_sum_CV, E_N=E_sum_N, IgG_int=G_sum_I, IgG_SNR=G_sum_SNR, G_CV=G_sum_CV, G_N=G_sum_N, Ratio_I=EtoG_I, Ratio_SNR=EtoG_SNR)
		cluster_sum <- data.frame(Ratio_SNR=EtoG_SNR)
		if (FILENUM==1) {data_array <<- data_sum}
		else {data_array <<- rbind(data_array, data_sum)}		}
	else non_csv <- non_csv + 1	
	}
cat(paste("total number of files:", paste(step, "\n")))
cat(paste("total number of csv files:", paste(step - non_csv, "\n")))
#g <<- ggobi(data_array)
if (export) {
	exportpath <- paste(readLines(pipe("pwd")),"/",sep="")
	write.csv(data_array, paste(exportpath, exportname, sep = ""))
	cat(paste("writing file", paste(exportpath, paste(exportname, "\n"), sep="")))
	}
}

BATCH(paste(current_dir, "/flinterman_data/VISIT_1", sep=""), exportname="flinterman_data/V1_DATA", skip_num=60)
BATCH(paste(current_dir, "/flinterman_data/Negative_controls", sep=""), exportname="flinterman_data/CTRL_DATA", skip_num=60)
v1 <- read.csv("flinterman_data/V1_DATA")
norm <- read.csv("flinterman_data/CTRL_DATA")
v1.clean <- subset(v1, E_CV<5); v1.clean <- subset(v1.clean, E_N == 4) #low CV and peptide elements only
norm.clean <- subset(norm, E_CV<5); norm.clean <- subset(norm.clean, E_N == 4) #low CV and peptide elements only

#now run loops to generate 'near neighbor' normalization
v1.nearE <- array(); norm.nearE <- array(); v1.nearG <- array(); norm.nearG <- array()
for (x in 1:length(v1.clean$IgE_SNR)){v1.nearE[x]<-median(c(v1.clean$IgE_SNR[x-1],v1.clean$IgE_SNR[x],v1.clean$IgE_SNR[x+1]))}
 v1.nearE[length(v1.nearE)] <- 1 # remove NA from last value
for (x in 1:length(v1.clean$IgG_SNR)){v1.nearG[x]<-median(c(v1.clean$IgG_SNR[x-1],v1.clean$IgG_SNR[x],v1.clean$IgG_SNR[x+1]))}
 v1.nearG[length(v1.nearG)] <- 1 # remove NA from last value
for (x in 1:length(norm.clean$IgE_SNR)){norm.nearE[x]<-median(c(norm.clean$IgE_SNR[x-1],norm.clean$IgE_SNR[x],norm.clean$IgE_SNR[x+1]))}
 norm.nearE[length(norm.nearE)] <- 1 # remove NA from last value
for (x in 1:length(norm.clean$IgG_SNR)){norm.nearG[x]<-median(c(norm.clean$IgG_SNR[x-1],norm.clean$IgG_SNR[x],norm.clean$IgG_SNR[x+1]))}
 norm.nearG[length(norm.nearG)] <- 1 # remove NA from last value

#create data frames
v1.near.EnG <- data.frame(element = v1.clean$element, subject = v1.clean$subject, E_SNR = v1.nearE, G_SNR = v1.nearG)
norm.near.EnG <-  data.frame(element = norm.clean$element, subject = norm.clean$subject, E_SNR = norm.nearE, G_SNR = norm.nearG)

#summary plot showing mean signal and cutoff value using log transformed SNR
quartz()
elementf <- factor(v1.clean$element)
v1.Esum.mean.419 <- tapply(v1.clean$IgE_SNR, elementf, mean)
plot(log(v1.Esum.mean.419, 2), type="n", ylim=c(0,6), ylab="IgE SNR",main="all peptides low CV data")
lines(log(v1.Esum.mean.419, 2),col=4)
elementf <- factor(norm.clean$element)
norm.Esum.mean.419 <- tapply(norm.clean$IgE_SNR, elementf, mean)
lines(log(norm.Esum.mean.419, 2), col=2)


#summary plot showing mean signal and cutoff value using log transformed 'nearby' SNR
quartz()
elementf <- factor(v1.near.EnG$element)
v1.Esum.mean.nearby <- tapply(v1.near.EnG$E_SNR, elementf, mean)
v1.Gsum.mean.nearby <- tapply(v1.near.EnG$G_SNR, elementf, mean)
plot(log(v1.Esum.mean.nearby, 2), type="n", ylim=c(-1,3), ylab="Log Signal Noise Ratio",main="nearby analysis low CV data")
lines(log(v1.Esum.mean.nearby, 2), col=4)
#display IgG4 below axis
lines(-1*log(v1.Gsum.mean.nearby, 2), col=3)
elementf <- factor(norm.near.EnG$element)
norm.Esum.mean.nearby <- tapply(norm.near.EnG$E_SNR, elementf, mean)
norm.Gsum.mean.nearby <- tapply(norm.near.EnG$G_SNR, elementf, mean)
lines(log(norm.Esum.mean.nearby, 2), col=2); lines(-1*log(norm.Gsum.mean.nearby, 2), col=2)
abline(v=c(205.5, 254.5), lty=3)
text(102,2.8,"Ara h1", cex=2)
text(230, 2.8,"Ara h2", cex=2)
text(330, 2.8,"Ara h3", cex=2)
text(-9,-0.9,"IgG4"); text(-10,2.9,"IgE")

#set cutoff for IgE and IgG
Ecutoff <- quantile(log(norm.near.EnG$E_SNR,2), 0.995, type=8)
abline(h=as.numeric(Ecutoff), lty=3)
Gcutoff <- quantile(log(norm.near.EnG$G_SNR,2), 0.995, type=8)
abline(h=-1*as.numeric(Gcutoff), lty=3)
quartz(); hist(log(norm.near.EnG$E_SNR,2)); abline(v=as.numeric(Ecutoff), lty=3)
quartz(); hist(log(norm.near.EnG$G_SNR,2)); abline(v=as.numeric(Gcutoff), lty=3)
v1.E.logical <- array(); v1.G.logical <- array()
for (x in 1:length(v1.near.EnG$E_SNR)) {
	v1.E.logical[x] <- as.numeric(log(v1.near.EnG$E_SNR[x],2)) > as.numeric(Ecutoff)
	v1.G.logical[x] <- as.numeric(log(v1.near.EnG$G_SNR[x],2)) > as.numeric(Gcutoff)	}
norm.E.logical <- array(); norm.G.logical <- array()
for (x in 1:length(norm.near.EnG$E_SNR)) {
	norm.E.logical[x] <- as.numeric(log(norm.near.EnG$E_SNR[x],2)) > as.numeric(Ecutoff)
	norm.G.logical[x] <- as.numeric(log(norm.near.EnG$G_SNR[x],2)) > as.numeric(Gcutoff)	}
		
#create EvG plots of normals and V1
quartz()
double_pos <- sum(as.numeric(norm.G.logical & norm.E.logical))
E.pos <- sum(as.numeric(norm.E.logical)) - double_pos
G.pos <- sum(as.numeric(norm.G.logical)) - double_pos
plot(log(norm.near.EnG$E_SNR,2), log(norm.near.EnG$G_SNR,2), xlim=c(-1,6), ylim=c(-1,6), xlab="log IgE SNR", ylab="log IgG SNR", main="Normal controls n=6"); abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3)
text(5.5,-0.5, round(E.pos/length(norm.E.logical) * 100, 2), cex=1.5)
text(5.5,5.5, round(double_pos/length(norm.E.logical) * 100, 2), cex=1.5)
text(-0.5,5.5, round(G.pos/length(norm.E.logical) * 100, 2), cex=1.5)

quartz()
double_pos <- sum(as.numeric(v1.G.logical & v1.E.logical))
E.pos <- sum(as.numeric(v1.E.logical)) - double_pos
G.pos <- sum(as.numeric(v1.G.logical)) - double_pos
plot(log(v1.near.EnG$E_SNR,2), log(v1.near.EnG$G_SNR,2), xlim=c(-1,6), ylim=c(-1,6), xlab="log IgE SNR", ylab="log IgG SNR", main="Peanut sensitized n=24")
abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3)
text(5.5,-0.5, round(E.pos/length(v1.E.logical) * 100, 2), cex=1.5)
text(5.5,5.5, round(double_pos/length(v1.E.logical) * 100, 2), cex=1.5)
text(-0.5,5.5, round(G.pos/length(v1.E.logical) * 100, 2), cex=1.5)


#create logical data.frame and plot for cutoff sum
#visit 1

v1.logical <- data.frame(element = v1.near.EnG$element, subject = v1.near.EnG$subject, Epos = v1.E.logical, Gpos = v1.G.logical)
elementf <- factor(v1.logical$element)
v1.logical.Esum <- tapply(v1.logical$Epos, elementf, sum)
quartz(); plot(v1.logical.Esum/length(levels(v1.logical$subject)), type="h", main="Peanut sensitized IgE", ylab="% positive")
lines(v1.logical.Esum/length(levels(v1.logical$subject)), col=4)
abline(v=c(205.5, 254.5), lty=3)
text(102,2.8,"Ara h1", cex=2)
text(230, 2.8,"Ara h2", cex=2)
text(330, 2.8,"Ara h3", cex=2)
text(-9,-0.9,"IgG4"); text(-10,2.9,"IgE")

v1.logical.Gsum <- tapply(v1.logical$Gpos, elementf, sum)
quartz(); plot(v1.logical.Gsum/length(levels(v1.logical$subject)), type="h", main="Peanut sensitized IgG", ylab="% positive")
lines(v1.logical.Gsum/length(levels(v1.logical$subject)), col=3)
abline(v=c(205.5, 254.5), lty=3)
text(102,2.8,"Ara h1", cex=2)
text(230, 2.8,"Ara h2", cex=2)
text(330, 2.8,"Ara h3", cex=2)
text(-9,-0.9,"IgG4"); text(-10,2.9,"IgE")

#normal controls

norm.logical <- data.frame(element = norm.near.EnG$element, subject = norm.near.EnG$subject, Epos = norm.E.logical, Gpos = norm.G.logical)
elementf <- factor(norm.logical$element)
norm.logical.Esum <- tapply(norm.logical$Epos, elementf, sum)
quartz(); plot(norm.logical.Esum/length(levels(norm.logical$subject)), type="h", main="Normals IgE", ylab="% positive")
lines(norm.logical.Esum/length(levels(norm.logical$subject)), col=4)

norm.logical.Gsum <- tapply(norm.logical$Gpos, elementf, sum)
quartz(); plot(norm.logical.Gsum/length(levels(norm.logical$subject)), type="h", main="Normals IgG", ylab="% positive")
lines(norm.logical.Gsum/length(levels(norm.logical$subject)), col=3)

#calculate percent of patients recognizing at least one epitope of Arah1, Arah2 or Arah3
element <- v1.logical$element
Arah1 <- element %in% grep("Ara_h_1", as.character(element), value=TRUE)
A1.logical <- subset(v1.logical, Arah1)
subjectf <- A1.logical$subject
A1.percent.patients <- tapply(A1.logical$Epos, subjectf, sum)

Arah2 <- element %in% grep("Ara_h_2", as.character(element), value=TRUE)
A2.logical <- subset(v1.logical, Arah2)
subjectf <- A2.logical$subject
A2.percent.patients <- tapply(A2.logical$Epos, subjectf, sum)

Arah3 <- element %in% grep("Ara_h_3", as.character(element), value=TRUE)
A3.logical <- subset(v1.logical, Arah3)
#A3.logical <- subset(A3.logical, element != "Ara_h_3.128")
#A3.logical <- subset(A3.logical, element != "Ara_h_3.129")
#A3.logical <- subset(A3.logical, element != "Ara_h_3.130")
#A3.logical <- subset(A3.logical, element != "Ara_h_3.131")
#A3.logical <- subset(A3.logical, element != "Ara_h_3.132")
subjectf <- A3.logical$subject
A3.percent.patients <- tapply(A3.logical$Epos, subjectf, sum)


#explore relationship between epitopes and reactivity and create bins by epitope num and by combo score

subjectf <- factor(v1.logical$subject)
E.logical.sum <- tapply(v1.logical$Epos, subjectf, sum)
G.logical.sum <- tapply(v1.logical$Gpos, subjectf, sum)
patient.data <- read.csv(paste(current_dir, "/flinterman_data/score_key.csv", sep=""))
v1.patient <- subset(patient.data, VISIT == "v1")
v1.patient.sort <- v1.patient[ do.call(order, v1.patient) ,] #sort by subject

E.epitope.num <- data.frame(subject=names(E.logical.sum), E.epitope=E.logical.sum)
G.epitope.num <- data.frame(subject=names(G.logical.sum), G.epitope=G.logical.sum)
v1.E.epitope.num.sort <- E.epitope.num[ do.call(order, E.epitope.num) ,]
v1.G.epitope.num.sort <- G.epitope.num[ do.call(order, G.epitope.num) ,]
v1.patient <- cbind(v1.patient.sort, E.epitope.num=v1.E.epitope.num.sort$E.epitope, G.epitope.num=v1.G.epitope.num.sort$G.epitope)
quartz(); plot(v1.patient$E.epitope.num, v1.patient$COMBO, main="Reaction severity v. epitope diversity",ylab="Reaction score", xlab="# positive epitopes")

q1.epitope <- subset(v1.patient, E.epitope.num <= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.25)))
epitopeQ <- data.frame(Q=1, epitopes=q1.epitope$E.epitope.num, combo=q1.epitope$COMBO)
q2.epitope <- subset(v1.patient, E.epitope.num <= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.5)) & E.epitope.num >= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.25)))
epitopeQ <- rbind(epitopeQ, data.frame(Q=2, epitopes=q2.epitope$E.epitope.num, combo=q2.epitope$COMBO))
q3.epitope <- subset(v1.patient, E.epitope.num <= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.75)) & E.epitope.num >= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.5)))
epitopeQ <- rbind(epitopeQ, data.frame(Q=3, epitopes=q3.epitope$E.epitope.num, combo=q3.epitope$COMBO))
q4.epitope <- subset(v1.patient, E.epitope.num >= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.75)))
epitopeQ <- rbind(epitopeQ, data.frame(Q=4, epitopes=q4.epitope$E.epitope.num, combo=q4.epitope$COMBO))

quartz(); boxplot(epitopeQ$combo ~ epitopeQ$Q, ylim=c(0,15), main="Reaction score by epitope quartile", ylab="Combo reaction score")
points(epitopeQ$combo ~ epitopeQ$Q, col=4, pch=19, cex=2)
epitope_extremes <- subset(epitopeQ, Q == 1 | Q == 4)

wilcox.test(subset(epitope_extremes,Q==1)$combo, subset(epitope_extremes,Q==4)$combo, alternative = "l")$p.value
text(4,14,"*",cex=2)

#comparison using sIGE
q1.sIgE <- subset(v1.patient, sIgE <= as.numeric(quantile(v1.patient$sIgE, prob = 0.25)))
sIgE_Q <- data.frame(Q=1, sIgE=q1.sIgE$sIgE, combo=q1.sIgE$COMBO)
q2.sIgE <- subset(v1.patient, sIgE <= as.numeric(quantile(v1.patient$sIgE, prob = 0.5)) & sIgE >= as.numeric(quantile(v1.patient$sIgE, prob = 0.25)))
sIgE_Q <- rbind(sIgE_Q, data.frame(Q=2, sIgE=q2.sIgE$sIgE, combo=q2.sIgE$COMBO))
q3.sIgE <- subset(v1.patient, sIgE <= as.numeric(quantile(v1.patient$sIgE, prob = 0.75)) & sIgE >= as.numeric(quantile(v1.patient$sIgE, prob = 0.5)))
sIgE_Q <- rbind(sIgE_Q, data.frame(Q=3, sIgE=q3.sIgE$sIgE, combo=q3.sIgE$COMBO))
q4.sIgE <- subset(v1.patient, sIgE >= as.numeric(quantile(v1.patient$sIgE, prob = 0.75)))
sIgE_Q <- rbind(sIgE_Q, data.frame(Q=4, sIgE=q4.sIgE$sIgE, combo=q4.sIgE$COMBO))

quartz(); boxplot(sIgE_Q$combo ~ sIgE_Q$Q, ylim=c(0,15), main="Reaction score by sIgE quartile", ylab="Combo reaction score")
points(sIgE_Q$combo ~ sIgE_Q$Q, col=4, pch=19, cex=2)
sIgE_extremes <- subset(sIgE_Q, Q == 1 | Q == 4)

wilcox.test(subset(sIgE_extremes,Q==1)$combo, subset(sIgE_extremes,Q==4)$combo, alternative = "l")$p.value

#quartile by combo score

q1.combo <- subset(v1.patient, COMBO <= as.numeric(quantile(v1.patient$COMBO, prob = 0.25)))
q1.near.EnG <- subset(v1.near.EnG, subject == as.character(q1.combo$subject[1]))
q1.logical <- subset(v1.logical, subject == as.character(q1.combo$subject[1]))
for (x in 2:length(q1.combo$subject)) {
	q1.near.EnG <- rbind(q1.near.EnG, subset(v1.near.EnG, subject == as.character(q1.combo$subject[x])))
	q1.logical <- rbind(q1.logical, subset(v1.logical, subject == as.character(q1.combo$subject[x]))) }
quartz(); plot(log(q1.near.EnG$E_SNR,2), log(q1.near.EnG$G_SNR,2), main="Q1 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR", xlim=c(-1,6), ylim=c(-1,6))
double_pos <- sum(as.numeric(q1.logical$Epos & q1.logical$Gpos))
E.pos <- sum(as.numeric(q1.logical$Epos)) - double_pos
G.pos <- sum(as.numeric(q1.logical$Gpos)) - double_pos
abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3)
text(5.5,-0.5, round(E.pos/length(q1.logical$Epos) * 100, 2), cex=1.5)
text(5.5,5.5, round(double_pos/length(q1.logical$Epos) * 100, 2), cex=1.5)
text(-0.5,5.5, round(G.pos/length(q1.logical$Epos) * 100, 2), cex=1.5)

q2.combo <- subset(v1.patient, COMBO <= as.numeric(quantile(v1.patient$COMBO, prob = 0.5)) & COMBO >= as.numeric(quantile(v1.patient$COMBO, prob = 0.25)))
q2.near.EnG <- subset(v1.near.EnG, subject == as.character(q2.combo$subject[1]))
q2.logical <- subset(v1.logical, subject == as.character(q2.combo$subject[1]))
for (x in 2:length(q2.combo$subject)) {
	q2.near.EnG <- rbind(q2.near.EnG, subset(v1.near.EnG, subject == as.character(q2.combo$subject[x]))) 
	q2.logical <- rbind(q2.logical, subset(v1.logical, subject == as.character(q2.combo$subject[x]))) }
quartz(); plot(log(q2.near.EnG$E_SNR,2), log(q2.near.EnG$G_SNR,2), main="Q2 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR", xlim=c(-1,6), ylim=c(-1,6))
double_pos <- sum(as.numeric(q2.logical$Epos & q2.logical$Gpos))
E.pos <- sum(as.numeric(q2.logical$Epos)) - double_pos
G.pos <- sum(as.numeric(q2.logical$Gpos)) - double_pos
abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3)
text(5.5,-0.5, round(E.pos/length(q2.logical$Epos) * 100, 2), cex=1.5)
text(5.5,5.5, round(double_pos/length(q2.logical$Epos) * 100, 2), cex=1.5)
text(-0.5,5.5, round(G.pos/length(q2.logical$Epos) * 100, 2), cex=1.5)


q3.combo <- subset(v1.patient, COMBO <= as.numeric(quantile(v1.patient$COMBO, prob = 0.75)) & COMBO >= as.numeric(quantile(v1.patient$COMBO, prob = 0.5)))
q3.near.EnG <- subset(v1.near.EnG, subject == as.character(q3.combo$subject[1]))
q3.logical <- subset(v1.logical, subject == as.character(q3.combo$subject[1]))
for (x in 2:length(q3.combo$subject)) {
	q3.near.EnG <- rbind(q3.near.EnG, subset(v1.near.EnG, subject == as.character(q3.combo$subject[x]))) 
	q3.logical <- rbind(q3.logical, subset(v1.logical, subject == as.character(q3.combo$subject[x]))) }
quartz(); plot(log(q3.near.EnG$E_SNR,2), log(q3.near.EnG$G_SNR,2), main="Q3 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR", xlim=c(-1,6), ylim=c(-1,6))
double_pos <- sum(as.numeric(q3.logical$Epos & q3.logical$Gpos))
E.pos <- sum(as.numeric(q3.logical$Epos)) - double_pos
G.pos <- sum(as.numeric(q3.logical$Gpos)) - double_pos
abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3)
text(5.5,-0.5, round(E.pos/length(q3.logical$Epos) * 100, 2), cex=1.5)
text(5.5,5.5, round(double_pos/length(q3.logical$Epos) * 100, 2), cex=1.5)
text(-0.5,5.5, round(G.pos/length(q3.logical$Epos) * 100, 2), cex=1.5)


q4.combo <- subset(v1.patient, COMBO >= as.numeric(quantile(v1.patient$COMBO, prob = 0.75)))
q4.near.EnG <- subset(v1.near.EnG, subject == as.character(q4.combo$subject[1]))
q4.logical <- subset(v1.logical, subject == as.character(q3.combo$subject[1]))
for (x in 2:length(q4.combo$subject)) {
	q4.near.EnG <- rbind(q4.near.EnG, subset(v1.near.EnG, subject == as.character(q4.combo$subject[x]))) 
	q4.logical <- rbind(q4.logical, subset(v1.logical, subject == as.character(q4.combo$subject[x]))) }
quartz(); plot(log(q4.near.EnG$E_SNR,2), log(q4.near.EnG$G_SNR,2), main="Q4 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR",xlim=c(-1,6), ylim=c(-1,6))
double_pos <- sum(as.numeric(q4.logical$Epos & q4.logical$Gpos))
E.pos <- sum(as.numeric(q4.logical$Epos)) - double_pos
G.pos <- sum(as.numeric(q4.logical$Gpos)) - double_pos
abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3)
text(5.5,-0.5, round(E.pos/length(q4.logical$Epos) * 100, 2), cex=1.5)
text(5.5,5.5, round(double_pos/length(q4.logical$Epos) * 100, 2), cex=1.5)
text(-0.5,5.5, round(G.pos/length(q4.logical$Epos) * 100, 2), cex=1.5)

# V4 data

BATCH(paste(current_dir, "/flinterman_data/VISIT_4", sep=""), exportname="flinterman_data/V4_DATA", skip_num=60)
v4 <- read.csv("flinterman_data/V4_DATA")
v4.clean <- subset(v4, E_CV<5); v4.clean <- subset(v4.clean, E_N == 4) #low CV and peptide elements only
norm.clean <- subset(norm, E_CV<5); norm.clean <- subset(norm.clean, E_N == 4) #low CV and peptide elements only
#now run loops to generate 'near neighbor' normalization
v4.nearE <- array(); norm.nearE <- array(); v4.nearG <- array(); norm.nearG <- array()
for (x in 1:length(v4.clean$IgE_SNR)){v4.nearE[x]<-median(c(v4.clean$IgE_SNR[x-1],v4.clean$IgE_SNR[x],v4.clean$IgE_SNR[x+1]))}
 v4.nearE[length(v4.nearE)] <- 1 # remove NA from last value
for (x in 1:length(v4.clean$IgG_SNR)){v4.nearG[x]<-median(c(v4.clean$IgG_SNR[x-1],v4.clean$IgG_SNR[x],v4.clean$IgG_SNR[x+1]))}
 v4.nearG[length(v4.nearG)] <- 1 # remove NA from last value

#create data frame

v4.near.EnG <- data.frame(element = v4.clean$element, subject = v4.clean$subject, E_SNR = v4.nearE, G_SNR = v4.nearG)

#summary plot showing mean signal and cutoff value using log transformed 'nearby' SNR

quartz()
elementf <- factor(v4.near.EnG$element)
v4.Esum.mean.nearby <- tapply(v4.near.EnG$E_SNR, elementf, mean)
v4.Gsum.mean.nearby <- tapply(v4.near.EnG$G_SNR, elementf, mean)
plot(log(v4.Esum.mean.nearby, 2), type="n", ylim=c(-1,3), ylab="Log Signal Noise Ratio",main="nearby analysis low CV data")
lines(log(v4.Esum.mean.nearby, 2), col=4)
#display IgG4 below axis
lines(-1*log(v4.Gsum.mean.nearby, 2), col=3)

#create logical data.frame and plot for cutoff sum
#visit 4
v4.E.logical <- array(); v4.G.logical <- array()
for (x in 1:length(v4.near.EnG$E_SNR)) {
	v4.E.logical[x] <- as.numeric(log(v4.near.EnG$E_SNR[x],2)) > as.numeric(Ecutoff)
	v4.G.logical[x] <- as.numeric(log(v4.near.EnG$G_SNR[x],2)) > as.numeric(Gcutoff)	}
v4.logical <- data.frame(element = v4.near.EnG$element, subject = v4.near.EnG$subject, Epos = v4.E.logical, Gpos = v4.G.logical)
elementf <- factor(v4.logical$element)
v4.logical.Esum <- tapply(v4.logical$Epos, elementf, sum)
quartz(); plot(v4.logical.Esum/length(levels(v4.logical$subject)), type="h", main="Peanut sensitized IgE", ylab="% positive")
lines(v4.logical.Esum/length(levels(v4.logical$subject)), col=4)

v4.logical.Gsum <- tapply(v4.logical$Gpos, elementf, sum)
quartz(); plot(v4.logical.Gsum/length(levels(v4.logical$subject)), type="h", main="Peanut sensitized IgG", ylab="% positive")
lines(v4.logical.Gsum/length(levels(v4.logical$subject)), col=3)

#create matrices for contour plots
v1v4E <- array(0, dim=c(21,20))
v1v4G <- array(0, dim=c(21,20))
for (x in 1:length(v1.logical.Esum)) {
	v1v4E[as.numeric(v1.logical.Esum[x])+1, as.numeric(v4.logical.Esum[x])+1] <- v1v4E[as.numeric(v1.logical.Esum[x])+1, as.numeric(v4.logical.Esum[x])+1] + 1
	v1v4G[as.numeric(v1.logical.Gsum[x])+1, as.numeric(v4.logical.Gsum[x])+1] <- v1v4G[as.numeric(v1.logical.Gsum[x])+1, as.numeric(v4.logical.Gsum[x])+1] + 1
	}
quartz(); contour(x=seq(0, 21,len=nrow(v1v4E)), y=seq(0, 20, len=ncol(v1v4E)), nlevels=as.numeric(max(v1v4E)), v1v4E, lwd=2, drawlabels = FALSE, col=4, main="Stability of IgE epitopes over 20 months", ylab="visit 4 sum positive by epitope", xlab="visit 1 sum positive by epitope"); abline(0,1, col=4)
quartz(); contour(x=seq(0, 21,len=nrow(v1v4G)), y=seq(0, 20, len=ncol(v1v4G)), nlevels=as.numeric(max(v1v4G)), v1v4G, lwd=2, drawlabels = FALSE, col=3, main="Stability of IgG epitopes over 20 months", ylab="visit 4 sum positive by epitope", xlab="visit 1 sum positive by epitope"); abline(0,1, col=3)
chisq.test(v1.logical.Esum, v4.logical.Esum, simulate.p.value=TRUE); chisq.test(v1.logical.Esum, rnorm(419), simulate.p.value=TRUE)
chisq.test(v1.logical.Gsum, v4.logical.Gsum, simulate.p.value=TRUE); chisq.test(v1.logical.Esum, rnorm(419), simulate.p.value=TRUE)

# create heatmap image

# first need to create column data.frame
subjectf <- factor(v1.near.EnG$subject)
elementf <- factor(v1.near.EnG$element)
tempE <- by(v1.near.EnG$E_SNR, list(subjectf, elementf), sum)
tempG <- by(v1.near.EnG$G_SNR, list(subjectf, elementf), sum)
temp <- log(tempE/tempG, 2)
v1.cluster <- array(as.numeric(temp), dim=c(24, 419))
row.names(v1.cluster) <- dimnames(temp)[[1]]
names(v1.cluster) <- dimnames(temp)[[2]]
quartz(); heatmap(as.matrix(v1.cluster), Colv=NA, cexCol = 0.1 + .1/log10(419), col=topo.colors(20), labCol = names(v1.cluster), scale="none")
rm("temp", "tempE", "tempG")
#end script