# Copyright 2007, Wayne G. Shreffler. Free non-comercial use is welcome. # This is a R analysis script for analysis of data set produced in collaboration with Annebeth Flinterman, Edward Knol and others. #R is by the R Foundation for Statistical Computing and is freely available at http://www.R-project.org. R Development Core Team (2006). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL. # THIS WILL RUN WITHOUT MODIFICATION ON OS X but will require editing of pipe functions and substitution of quartz functions (e.g. with 'pdf') for use on Windows OS. # Direct questions or suggestions about this script to the author at wayne.shreffler@mssm.edu. #functions #stdev <- function(x) (sqrt(var(x)/length(x)))/mean(x) #cutoff <- function(x) (mean(x) + 3*stdev(10+x)) current_dir <- readLines(pipe("pwd")) download.file("http://www.iisinai.org/shreffler/flinterman_data.zip", "flinterman_data.zip") pipe("unzip 'flinterman_data.zip'", "w") BATCH <-function(directory, exportname="R_sum.csv", descrptive_header=TRUE, export=TRUE, skip_num=59) { dir <- readLines(pipe(paste("ls", directory))) pathname <- paste(directory, "/", sep="") FILENUM <- 0 non_csv <- 0 cv <- function(x) 100*((sqrt(var(x)/length(x)))/mean(x)) #define CV cat("processing . . .\n") for (step in 1:length(dir)) { filename <- paste(pathname, dir[step], sep="") shortname <- dir[step]; corename <- substring(shortname, 1, nchar(shortname)-3) csv_test <- substring(filename, nchar(filename)-2, nchar(filename)) boo <- any(as.logical(grep(csv_test, "csv"))) if (boo) { if (FILENUM == 0) FILENUM<-1 else FILENUM <- 2 # used below for define of data_array if (descrptive_header) { # option for running manually stripped files TABLE <- read.table(filename, skip=skip_num,sep=",",check.names=FALSE, header=TRUE, fill=TRUE, strip.white=TRUE) ROWS <- length(TABLE[[1]]) #GET TABLE LENGTH TO LEAVE OFF LAST ROW TABLE <- read.table(filename, skip=skip_num,sep=",",check.names=FALSE, header=TRUE, fill=TRUE, strip.white=TRUE, nrows=ROWS-1)} else {TABLE <- read.table(filename, skip=skip_num,sep=",",check.names=FALSE, header=TRUE, fill=TRUE, strip.white=TRUE) } cat(paste(filename, "\n")) TABLE$Ch1_Median <- log(TABLE$Ch1_Median,10) # log transform TABLE$Ch1_Median <- TABLE$Ch1_Median/as.numeric(summary((TABLE$Ch1_Median))[3]) #normalize TABLE$Ch1_SignalNoiseRatio < log(TABLE$Ch1_SignalNoiseRatio,10) TABLE$Ch1_SignalNoiseRatio <- TABLE$Ch1_SignalNoiseRatio/as.numeric(summary((TABLE$Ch1_SignalNoiseRatio))[3]) TABLE$Ch2_Median <- log(TABLE$Ch2_Median,10) # log transform TABLE$Ch2_Median <- TABLE$Ch2_Median/as.numeric(summary((TABLE$Ch2_Median))[3]) #normalize TABLE$Ch2_SignalNoiseRatio < log(TABLE$Ch2_SignalNoiseRatio,10) TABLE$Ch2_SignalNoiseRatio <- TABLE$Ch2_SignalNoiseRatio/as.numeric(summary((TABLE$Ch2_SignalNoiseRatio))[3]) #normalize tablef <- factor(TABLE$Name) #create factor levels # IgE data (channel 1) E_sum_I <- tapply(TABLE$Ch1_Median, tablef, mean) E_sum_SNR <- tapply(TABLE$Ch1_SignalNoiseRatio, tablef, mean) E_sum_CV <- tapply(TABLE$Ch1_Median, tablef, cv) E_sum_N <- tapply(TABLE$Ch1_Median, tablef, length) # IgG4 data (channel 2) G_sum_I <- tapply(TABLE$Ch2_Median, tablef, mean) G_sum_SNR <- tapply(TABLE$Ch2_SignalNoiseRatio, tablef, mean) G_sum_CV <- tapply(TABLE$Ch2_Median, tablef, cv) G_sum_N <- tapply(TABLE$Ch2_Median, tablef, length) # ratio data EtoG_I <- E_sum_I/ G_sum_I EtoG_SNR <- E_sum_SNR/ G_sum_SNR #quartz(); plot(sum_I, sum_CV, main=paste(corename, "raw data")) data_sum <- data.frame(subject=corename,element=row.names(E_sum_I),IgE_int=E_sum_I, IgE_SNR=E_sum_SNR, E_CV=E_sum_CV, E_N=E_sum_N, IgG_int=G_sum_I, IgG_SNR=G_sum_SNR, G_CV=G_sum_CV, G_N=G_sum_N, Ratio_I=EtoG_I, Ratio_SNR=EtoG_SNR) cluster_sum <- data.frame(Ratio_SNR=EtoG_SNR) if (FILENUM==1) {data_array <<- data_sum} else {data_array <<- rbind(data_array, data_sum)} } else non_csv <- non_csv + 1 } cat(paste("total number of files:", paste(step, "\n"))) cat(paste("total number of csv files:", paste(step - non_csv, "\n"))) #g <<- ggobi(data_array) if (export) { exportpath <- paste(readLines(pipe("pwd")),"/",sep="") write.csv(data_array, paste(exportpath, exportname, sep = "")) cat(paste("writing file", paste(exportpath, paste(exportname, "\n"), sep=""))) } } BATCH(paste(current_dir, "/flinterman_data/VISIT_1", sep=""), exportname="flinterman_data/V1_DATA", skip_num=60) BATCH(paste(current_dir, "/flinterman_data/Negative_controls", sep=""), exportname="flinterman_data/CTRL_DATA", skip_num=60) v1 <- read.csv("flinterman_data/V1_DATA") norm <- read.csv("flinterman_data/CTRL_DATA") v1.clean <- subset(v1, E_CV<5); v1.clean <- subset(v1.clean, E_N == 4) #low CV and peptide elements only norm.clean <- subset(norm, E_CV<5); norm.clean <- subset(norm.clean, E_N == 4) #low CV and peptide elements only #now run loops to generate 'near neighbor' normalization v1.nearE <- array(); norm.nearE <- array(); v1.nearG <- array(); norm.nearG <- array() for (x in 1:length(v1.clean$IgE_SNR)){v1.nearE[x]<-median(c(v1.clean$IgE_SNR[x-1],v1.clean$IgE_SNR[x],v1.clean$IgE_SNR[x+1]))} v1.nearE[length(v1.nearE)] <- 1 # remove NA from last value for (x in 1:length(v1.clean$IgG_SNR)){v1.nearG[x]<-median(c(v1.clean$IgG_SNR[x-1],v1.clean$IgG_SNR[x],v1.clean$IgG_SNR[x+1]))} v1.nearG[length(v1.nearG)] <- 1 # remove NA from last value for (x in 1:length(norm.clean$IgE_SNR)){norm.nearE[x]<-median(c(norm.clean$IgE_SNR[x-1],norm.clean$IgE_SNR[x],norm.clean$IgE_SNR[x+1]))} norm.nearE[length(norm.nearE)] <- 1 # remove NA from last value for (x in 1:length(norm.clean$IgG_SNR)){norm.nearG[x]<-median(c(norm.clean$IgG_SNR[x-1],norm.clean$IgG_SNR[x],norm.clean$IgG_SNR[x+1]))} norm.nearG[length(norm.nearG)] <- 1 # remove NA from last value #create data frames v1.near.EnG <- data.frame(element = v1.clean$element, subject = v1.clean$subject, E_SNR = v1.nearE, G_SNR = v1.nearG) norm.near.EnG <- data.frame(element = norm.clean$element, subject = norm.clean$subject, E_SNR = norm.nearE, G_SNR = norm.nearG) #summary plot showing mean signal and cutoff value using log transformed SNR quartz() elementf <- factor(v1.clean$element) v1.Esum.mean.419 <- tapply(v1.clean$IgE_SNR, elementf, mean) plot(log(v1.Esum.mean.419, 2), type="n", ylim=c(0,6), ylab="IgE SNR",main="all peptides low CV data") lines(log(v1.Esum.mean.419, 2),col=4) elementf <- factor(norm.clean$element) norm.Esum.mean.419 <- tapply(norm.clean$IgE_SNR, elementf, mean) lines(log(norm.Esum.mean.419, 2), col=2) #summary plot showing mean signal and cutoff value using log transformed 'nearby' SNR quartz() elementf <- factor(v1.near.EnG$element) v1.Esum.mean.nearby <- tapply(v1.near.EnG$E_SNR, elementf, mean) v1.Gsum.mean.nearby <- tapply(v1.near.EnG$G_SNR, elementf, mean) plot(log(v1.Esum.mean.nearby, 2), type="n", ylim=c(-1,3), ylab="Log Signal Noise Ratio",main="nearby analysis low CV data") lines(log(v1.Esum.mean.nearby, 2), col=4) #display IgG4 below axis lines(-1*log(v1.Gsum.mean.nearby, 2), col=3) elementf <- factor(norm.near.EnG$element) norm.Esum.mean.nearby <- tapply(norm.near.EnG$E_SNR, elementf, mean) norm.Gsum.mean.nearby <- tapply(norm.near.EnG$G_SNR, elementf, mean) lines(log(norm.Esum.mean.nearby, 2), col=2); lines(-1*log(norm.Gsum.mean.nearby, 2), col=2) abline(v=c(205.5, 254.5), lty=3) text(102,2.8,"Ara h1", cex=2) text(230, 2.8,"Ara h2", cex=2) text(330, 2.8,"Ara h3", cex=2) text(-9,-0.9,"IgG4"); text(-10,2.9,"IgE") #set cutoff for IgE and IgG Ecutoff <- quantile(log(norm.near.EnG$E_SNR,2), 0.995, type=8) abline(h=as.numeric(Ecutoff), lty=3) Gcutoff <- quantile(log(norm.near.EnG$G_SNR,2), 0.995, type=8) abline(h=-1*as.numeric(Gcutoff), lty=3) quartz(); hist(log(norm.near.EnG$E_SNR,2)); abline(v=as.numeric(Ecutoff), lty=3) quartz(); hist(log(norm.near.EnG$G_SNR,2)); abline(v=as.numeric(Gcutoff), lty=3) v1.E.logical <- array(); v1.G.logical <- array() for (x in 1:length(v1.near.EnG$E_SNR)) { v1.E.logical[x] <- as.numeric(log(v1.near.EnG$E_SNR[x],2)) > as.numeric(Ecutoff) v1.G.logical[x] <- as.numeric(log(v1.near.EnG$G_SNR[x],2)) > as.numeric(Gcutoff) } norm.E.logical <- array(); norm.G.logical <- array() for (x in 1:length(norm.near.EnG$E_SNR)) { norm.E.logical[x] <- as.numeric(log(norm.near.EnG$E_SNR[x],2)) > as.numeric(Ecutoff) norm.G.logical[x] <- as.numeric(log(norm.near.EnG$G_SNR[x],2)) > as.numeric(Gcutoff) } #create EvG plots of normals and V1 quartz() double_pos <- sum(as.numeric(norm.G.logical & norm.E.logical)) E.pos <- sum(as.numeric(norm.E.logical)) - double_pos G.pos <- sum(as.numeric(norm.G.logical)) - double_pos plot(log(norm.near.EnG$E_SNR,2), log(norm.near.EnG$G_SNR,2), xlim=c(-1,6), ylim=c(-1,6), xlab="log IgE SNR", ylab="log IgG SNR", main="Normal controls n=6"); abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3) text(5.5,-0.5, round(E.pos/length(norm.E.logical) * 100, 2), cex=1.5) text(5.5,5.5, round(double_pos/length(norm.E.logical) * 100, 2), cex=1.5) text(-0.5,5.5, round(G.pos/length(norm.E.logical) * 100, 2), cex=1.5) quartz() double_pos <- sum(as.numeric(v1.G.logical & v1.E.logical)) E.pos <- sum(as.numeric(v1.E.logical)) - double_pos G.pos <- sum(as.numeric(v1.G.logical)) - double_pos plot(log(v1.near.EnG$E_SNR,2), log(v1.near.EnG$G_SNR,2), xlim=c(-1,6), ylim=c(-1,6), xlab="log IgE SNR", ylab="log IgG SNR", main="Peanut sensitized n=24") abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3) text(5.5,-0.5, round(E.pos/length(v1.E.logical) * 100, 2), cex=1.5) text(5.5,5.5, round(double_pos/length(v1.E.logical) * 100, 2), cex=1.5) text(-0.5,5.5, round(G.pos/length(v1.E.logical) * 100, 2), cex=1.5) #create logical data.frame and plot for cutoff sum #visit 1 v1.logical <- data.frame(element = v1.near.EnG$element, subject = v1.near.EnG$subject, Epos = v1.E.logical, Gpos = v1.G.logical) elementf <- factor(v1.logical$element) v1.logical.Esum <- tapply(v1.logical$Epos, elementf, sum) quartz(); plot(v1.logical.Esum/length(levels(v1.logical$subject)), type="h", main="Peanut sensitized IgE", ylab="% positive") lines(v1.logical.Esum/length(levels(v1.logical$subject)), col=4) abline(v=c(205.5, 254.5), lty=3) text(102,2.8,"Ara h1", cex=2) text(230, 2.8,"Ara h2", cex=2) text(330, 2.8,"Ara h3", cex=2) text(-9,-0.9,"IgG4"); text(-10,2.9,"IgE") v1.logical.Gsum <- tapply(v1.logical$Gpos, elementf, sum) quartz(); plot(v1.logical.Gsum/length(levels(v1.logical$subject)), type="h", main="Peanut sensitized IgG", ylab="% positive") lines(v1.logical.Gsum/length(levels(v1.logical$subject)), col=3) abline(v=c(205.5, 254.5), lty=3) text(102,2.8,"Ara h1", cex=2) text(230, 2.8,"Ara h2", cex=2) text(330, 2.8,"Ara h3", cex=2) text(-9,-0.9,"IgG4"); text(-10,2.9,"IgE") #normal controls norm.logical <- data.frame(element = norm.near.EnG$element, subject = norm.near.EnG$subject, Epos = norm.E.logical, Gpos = norm.G.logical) elementf <- factor(norm.logical$element) norm.logical.Esum <- tapply(norm.logical$Epos, elementf, sum) quartz(); plot(norm.logical.Esum/length(levels(norm.logical$subject)), type="h", main="Normals IgE", ylab="% positive") lines(norm.logical.Esum/length(levels(norm.logical$subject)), col=4) norm.logical.Gsum <- tapply(norm.logical$Gpos, elementf, sum) quartz(); plot(norm.logical.Gsum/length(levels(norm.logical$subject)), type="h", main="Normals IgG", ylab="% positive") lines(norm.logical.Gsum/length(levels(norm.logical$subject)), col=3) #calculate percent of patients recognizing at least one epitope of Arah1, Arah2 or Arah3 element <- v1.logical$element Arah1 <- element %in% grep("Ara_h_1", as.character(element), value=TRUE) A1.logical <- subset(v1.logical, Arah1) subjectf <- A1.logical$subject A1.percent.patients <- tapply(A1.logical$Epos, subjectf, sum) Arah2 <- element %in% grep("Ara_h_2", as.character(element), value=TRUE) A2.logical <- subset(v1.logical, Arah2) subjectf <- A2.logical$subject A2.percent.patients <- tapply(A2.logical$Epos, subjectf, sum) Arah3 <- element %in% grep("Ara_h_3", as.character(element), value=TRUE) A3.logical <- subset(v1.logical, Arah3) #A3.logical <- subset(A3.logical, element != "Ara_h_3.128") #A3.logical <- subset(A3.logical, element != "Ara_h_3.129") #A3.logical <- subset(A3.logical, element != "Ara_h_3.130") #A3.logical <- subset(A3.logical, element != "Ara_h_3.131") #A3.logical <- subset(A3.logical, element != "Ara_h_3.132") subjectf <- A3.logical$subject A3.percent.patients <- tapply(A3.logical$Epos, subjectf, sum) #explore relationship between epitopes and reactivity and create bins by epitope num and by combo score subjectf <- factor(v1.logical$subject) E.logical.sum <- tapply(v1.logical$Epos, subjectf, sum) G.logical.sum <- tapply(v1.logical$Gpos, subjectf, sum) patient.data <- read.csv(paste(current_dir, "/flinterman_data/score_key.csv", sep="")) v1.patient <- subset(patient.data, VISIT == "v1") v1.patient.sort <- v1.patient[ do.call(order, v1.patient) ,] #sort by subject E.epitope.num <- data.frame(subject=names(E.logical.sum), E.epitope=E.logical.sum) G.epitope.num <- data.frame(subject=names(G.logical.sum), G.epitope=G.logical.sum) v1.E.epitope.num.sort <- E.epitope.num[ do.call(order, E.epitope.num) ,] v1.G.epitope.num.sort <- G.epitope.num[ do.call(order, G.epitope.num) ,] v1.patient <- cbind(v1.patient.sort, E.epitope.num=v1.E.epitope.num.sort$E.epitope, G.epitope.num=v1.G.epitope.num.sort$G.epitope) quartz(); plot(v1.patient$E.epitope.num, v1.patient$COMBO, main="Reaction severity v. epitope diversity",ylab="Reaction score", xlab="# positive epitopes") q1.epitope <- subset(v1.patient, E.epitope.num <= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.25))) epitopeQ <- data.frame(Q=1, epitopes=q1.epitope$E.epitope.num, combo=q1.epitope$COMBO) q2.epitope <- subset(v1.patient, E.epitope.num <= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.5)) & E.epitope.num >= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.25))) epitopeQ <- rbind(epitopeQ, data.frame(Q=2, epitopes=q2.epitope$E.epitope.num, combo=q2.epitope$COMBO)) q3.epitope <- subset(v1.patient, E.epitope.num <= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.75)) & E.epitope.num >= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.5))) epitopeQ <- rbind(epitopeQ, data.frame(Q=3, epitopes=q3.epitope$E.epitope.num, combo=q3.epitope$COMBO)) q4.epitope <- subset(v1.patient, E.epitope.num >= as.numeric(quantile(v1.patient$E.epitope.num, prob = 0.75))) epitopeQ <- rbind(epitopeQ, data.frame(Q=4, epitopes=q4.epitope$E.epitope.num, combo=q4.epitope$COMBO)) quartz(); boxplot(epitopeQ$combo ~ epitopeQ$Q, ylim=c(0,15), main="Reaction score by epitope quartile", ylab="Combo reaction score") points(epitopeQ$combo ~ epitopeQ$Q, col=4, pch=19, cex=2) epitope_extremes <- subset(epitopeQ, Q == 1 | Q == 4) wilcox.test(subset(epitope_extremes,Q==1)$combo, subset(epitope_extremes,Q==4)$combo, alternative = "l")$p.value text(4,14,"*",cex=2) #comparison using sIGE q1.sIgE <- subset(v1.patient, sIgE <= as.numeric(quantile(v1.patient$sIgE, prob = 0.25))) sIgE_Q <- data.frame(Q=1, sIgE=q1.sIgE$sIgE, combo=q1.sIgE$COMBO) q2.sIgE <- subset(v1.patient, sIgE <= as.numeric(quantile(v1.patient$sIgE, prob = 0.5)) & sIgE >= as.numeric(quantile(v1.patient$sIgE, prob = 0.25))) sIgE_Q <- rbind(sIgE_Q, data.frame(Q=2, sIgE=q2.sIgE$sIgE, combo=q2.sIgE$COMBO)) q3.sIgE <- subset(v1.patient, sIgE <= as.numeric(quantile(v1.patient$sIgE, prob = 0.75)) & sIgE >= as.numeric(quantile(v1.patient$sIgE, prob = 0.5))) sIgE_Q <- rbind(sIgE_Q, data.frame(Q=3, sIgE=q3.sIgE$sIgE, combo=q3.sIgE$COMBO)) q4.sIgE <- subset(v1.patient, sIgE >= as.numeric(quantile(v1.patient$sIgE, prob = 0.75))) sIgE_Q <- rbind(sIgE_Q, data.frame(Q=4, sIgE=q4.sIgE$sIgE, combo=q4.sIgE$COMBO)) quartz(); boxplot(sIgE_Q$combo ~ sIgE_Q$Q, ylim=c(0,15), main="Reaction score by sIgE quartile", ylab="Combo reaction score") points(sIgE_Q$combo ~ sIgE_Q$Q, col=4, pch=19, cex=2) sIgE_extremes <- subset(sIgE_Q, Q == 1 | Q == 4) wilcox.test(subset(sIgE_extremes,Q==1)$combo, subset(sIgE_extremes,Q==4)$combo, alternative = "l")$p.value #quartile by combo score q1.combo <- subset(v1.patient, COMBO <= as.numeric(quantile(v1.patient$COMBO, prob = 0.25))) q1.near.EnG <- subset(v1.near.EnG, subject == as.character(q1.combo$subject[1])) q1.logical <- subset(v1.logical, subject == as.character(q1.combo$subject[1])) for (x in 2:length(q1.combo$subject)) { q1.near.EnG <- rbind(q1.near.EnG, subset(v1.near.EnG, subject == as.character(q1.combo$subject[x]))) q1.logical <- rbind(q1.logical, subset(v1.logical, subject == as.character(q1.combo$subject[x]))) } quartz(); plot(log(q1.near.EnG$E_SNR,2), log(q1.near.EnG$G_SNR,2), main="Q1 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR", xlim=c(-1,6), ylim=c(-1,6)) double_pos <- sum(as.numeric(q1.logical$Epos & q1.logical$Gpos)) E.pos <- sum(as.numeric(q1.logical$Epos)) - double_pos G.pos <- sum(as.numeric(q1.logical$Gpos)) - double_pos abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3) text(5.5,-0.5, round(E.pos/length(q1.logical$Epos) * 100, 2), cex=1.5) text(5.5,5.5, round(double_pos/length(q1.logical$Epos) * 100, 2), cex=1.5) text(-0.5,5.5, round(G.pos/length(q1.logical$Epos) * 100, 2), cex=1.5) q2.combo <- subset(v1.patient, COMBO <= as.numeric(quantile(v1.patient$COMBO, prob = 0.5)) & COMBO >= as.numeric(quantile(v1.patient$COMBO, prob = 0.25))) q2.near.EnG <- subset(v1.near.EnG, subject == as.character(q2.combo$subject[1])) q2.logical <- subset(v1.logical, subject == as.character(q2.combo$subject[1])) for (x in 2:length(q2.combo$subject)) { q2.near.EnG <- rbind(q2.near.EnG, subset(v1.near.EnG, subject == as.character(q2.combo$subject[x]))) q2.logical <- rbind(q2.logical, subset(v1.logical, subject == as.character(q2.combo$subject[x]))) } quartz(); plot(log(q2.near.EnG$E_SNR,2), log(q2.near.EnG$G_SNR,2), main="Q2 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR", xlim=c(-1,6), ylim=c(-1,6)) double_pos <- sum(as.numeric(q2.logical$Epos & q2.logical$Gpos)) E.pos <- sum(as.numeric(q2.logical$Epos)) - double_pos G.pos <- sum(as.numeric(q2.logical$Gpos)) - double_pos abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3) text(5.5,-0.5, round(E.pos/length(q2.logical$Epos) * 100, 2), cex=1.5) text(5.5,5.5, round(double_pos/length(q2.logical$Epos) * 100, 2), cex=1.5) text(-0.5,5.5, round(G.pos/length(q2.logical$Epos) * 100, 2), cex=1.5) q3.combo <- subset(v1.patient, COMBO <= as.numeric(quantile(v1.patient$COMBO, prob = 0.75)) & COMBO >= as.numeric(quantile(v1.patient$COMBO, prob = 0.5))) q3.near.EnG <- subset(v1.near.EnG, subject == as.character(q3.combo$subject[1])) q3.logical <- subset(v1.logical, subject == as.character(q3.combo$subject[1])) for (x in 2:length(q3.combo$subject)) { q3.near.EnG <- rbind(q3.near.EnG, subset(v1.near.EnG, subject == as.character(q3.combo$subject[x]))) q3.logical <- rbind(q3.logical, subset(v1.logical, subject == as.character(q3.combo$subject[x]))) } quartz(); plot(log(q3.near.EnG$E_SNR,2), log(q3.near.EnG$G_SNR,2), main="Q3 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR", xlim=c(-1,6), ylim=c(-1,6)) double_pos <- sum(as.numeric(q3.logical$Epos & q3.logical$Gpos)) E.pos <- sum(as.numeric(q3.logical$Epos)) - double_pos G.pos <- sum(as.numeric(q3.logical$Gpos)) - double_pos abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3) text(5.5,-0.5, round(E.pos/length(q3.logical$Epos) * 100, 2), cex=1.5) text(5.5,5.5, round(double_pos/length(q3.logical$Epos) * 100, 2), cex=1.5) text(-0.5,5.5, round(G.pos/length(q3.logical$Epos) * 100, 2), cex=1.5) q4.combo <- subset(v1.patient, COMBO >= as.numeric(quantile(v1.patient$COMBO, prob = 0.75))) q4.near.EnG <- subset(v1.near.EnG, subject == as.character(q4.combo$subject[1])) q4.logical <- subset(v1.logical, subject == as.character(q3.combo$subject[1])) for (x in 2:length(q4.combo$subject)) { q4.near.EnG <- rbind(q4.near.EnG, subset(v1.near.EnG, subject == as.character(q4.combo$subject[x]))) q4.logical <- rbind(q4.logical, subset(v1.logical, subject == as.character(q4.combo$subject[x]))) } quartz(); plot(log(q4.near.EnG$E_SNR,2), log(q4.near.EnG$G_SNR,2), main="Q4 reaction severity", xlab="log IgE SNR", ylab="log IgG SNR",xlim=c(-1,6), ylim=c(-1,6)) double_pos <- sum(as.numeric(q4.logical$Epos & q4.logical$Gpos)) E.pos <- sum(as.numeric(q4.logical$Epos)) - double_pos G.pos <- sum(as.numeric(q4.logical$Gpos)) - double_pos abline(v=as.numeric(Ecutoff),h=as.numeric(Gcutoff), lty=3) text(5.5,-0.5, round(E.pos/length(q4.logical$Epos) * 100, 2), cex=1.5) text(5.5,5.5, round(double_pos/length(q4.logical$Epos) * 100, 2), cex=1.5) text(-0.5,5.5, round(G.pos/length(q4.logical$Epos) * 100, 2), cex=1.5) # V4 data BATCH(paste(current_dir, "/flinterman_data/VISIT_4", sep=""), exportname="flinterman_data/V4_DATA", skip_num=60) v4 <- read.csv("flinterman_data/V4_DATA") v4.clean <- subset(v4, E_CV<5); v4.clean <- subset(v4.clean, E_N == 4) #low CV and peptide elements only norm.clean <- subset(norm, E_CV<5); norm.clean <- subset(norm.clean, E_N == 4) #low CV and peptide elements only #now run loops to generate 'near neighbor' normalization v4.nearE <- array(); norm.nearE <- array(); v4.nearG <- array(); norm.nearG <- array() for (x in 1:length(v4.clean$IgE_SNR)){v4.nearE[x]<-median(c(v4.clean$IgE_SNR[x-1],v4.clean$IgE_SNR[x],v4.clean$IgE_SNR[x+1]))} v4.nearE[length(v4.nearE)] <- 1 # remove NA from last value for (x in 1:length(v4.clean$IgG_SNR)){v4.nearG[x]<-median(c(v4.clean$IgG_SNR[x-1],v4.clean$IgG_SNR[x],v4.clean$IgG_SNR[x+1]))} v4.nearG[length(v4.nearG)] <- 1 # remove NA from last value #create data frame v4.near.EnG <- data.frame(element = v4.clean$element, subject = v4.clean$subject, E_SNR = v4.nearE, G_SNR = v4.nearG) #summary plot showing mean signal and cutoff value using log transformed 'nearby' SNR quartz() elementf <- factor(v4.near.EnG$element) v4.Esum.mean.nearby <- tapply(v4.near.EnG$E_SNR, elementf, mean) v4.Gsum.mean.nearby <- tapply(v4.near.EnG$G_SNR, elementf, mean) plot(log(v4.Esum.mean.nearby, 2), type="n", ylim=c(-1,3), ylab="Log Signal Noise Ratio",main="nearby analysis low CV data") lines(log(v4.Esum.mean.nearby, 2), col=4) #display IgG4 below axis lines(-1*log(v4.Gsum.mean.nearby, 2), col=3) #create logical data.frame and plot for cutoff sum #visit 4 v4.E.logical <- array(); v4.G.logical <- array() for (x in 1:length(v4.near.EnG$E_SNR)) { v4.E.logical[x] <- as.numeric(log(v4.near.EnG$E_SNR[x],2)) > as.numeric(Ecutoff) v4.G.logical[x] <- as.numeric(log(v4.near.EnG$G_SNR[x],2)) > as.numeric(Gcutoff) } v4.logical <- data.frame(element = v4.near.EnG$element, subject = v4.near.EnG$subject, Epos = v4.E.logical, Gpos = v4.G.logical) elementf <- factor(v4.logical$element) v4.logical.Esum <- tapply(v4.logical$Epos, elementf, sum) quartz(); plot(v4.logical.Esum/length(levels(v4.logical$subject)), type="h", main="Peanut sensitized IgE", ylab="% positive") lines(v4.logical.Esum/length(levels(v4.logical$subject)), col=4) v4.logical.Gsum <- tapply(v4.logical$Gpos, elementf, sum) quartz(); plot(v4.logical.Gsum/length(levels(v4.logical$subject)), type="h", main="Peanut sensitized IgG", ylab="% positive") lines(v4.logical.Gsum/length(levels(v4.logical$subject)), col=3) #create matrices for contour plots v1v4E <- array(0, dim=c(21,20)) v1v4G <- array(0, dim=c(21,20)) for (x in 1:length(v1.logical.Esum)) { v1v4E[as.numeric(v1.logical.Esum[x])+1, as.numeric(v4.logical.Esum[x])+1] <- v1v4E[as.numeric(v1.logical.Esum[x])+1, as.numeric(v4.logical.Esum[x])+1] + 1 v1v4G[as.numeric(v1.logical.Gsum[x])+1, as.numeric(v4.logical.Gsum[x])+1] <- v1v4G[as.numeric(v1.logical.Gsum[x])+1, as.numeric(v4.logical.Gsum[x])+1] + 1 } quartz(); contour(x=seq(0, 21,len=nrow(v1v4E)), y=seq(0, 20, len=ncol(v1v4E)), nlevels=as.numeric(max(v1v4E)), v1v4E, lwd=2, drawlabels = FALSE, col=4, main="Stability of IgE epitopes over 20 months", ylab="visit 4 sum positive by epitope", xlab="visit 1 sum positive by epitope"); abline(0,1, col=4) quartz(); contour(x=seq(0, 21,len=nrow(v1v4G)), y=seq(0, 20, len=ncol(v1v4G)), nlevels=as.numeric(max(v1v4G)), v1v4G, lwd=2, drawlabels = FALSE, col=3, main="Stability of IgG epitopes over 20 months", ylab="visit 4 sum positive by epitope", xlab="visit 1 sum positive by epitope"); abline(0,1, col=3) chisq.test(v1.logical.Esum, v4.logical.Esum, simulate.p.value=TRUE); chisq.test(v1.logical.Esum, rnorm(419), simulate.p.value=TRUE) chisq.test(v1.logical.Gsum, v4.logical.Gsum, simulate.p.value=TRUE); chisq.test(v1.logical.Esum, rnorm(419), simulate.p.value=TRUE) # create heatmap image # first need to create column data.frame subjectf <- factor(v1.near.EnG$subject) elementf <- factor(v1.near.EnG$element) tempE <- by(v1.near.EnG$E_SNR, list(subjectf, elementf), sum) tempG <- by(v1.near.EnG$G_SNR, list(subjectf, elementf), sum) temp <- log(tempE/tempG, 2) v1.cluster <- array(as.numeric(temp), dim=c(24, 419)) row.names(v1.cluster) <- dimnames(temp)[[1]] names(v1.cluster) <- dimnames(temp)[[2]] quartz(); heatmap(as.matrix(v1.cluster), Colv=NA, cexCol = 0.1 + .1/log10(419), col=topo.colors(20), labCol = names(v1.cluster), scale="none") rm("temp", "tempE", "tempG") #end script