#for analysis PM project basohpil data rm(list=ls()) PM_BATCH <-function(directory, exportname="", export=FALSE) { dir <- readLines(pipe(paste("ls", directory))) paths <- paste(directory, dir, sep="/") FILENUM <- 0 #non_data <- 0 #cv <- function(x) 100*((sqrt(var(x)/length(x)))/mean(x)) #define CV non_baso <- 0 cat("processing . . .\n") for (step in 1:length(paths)) { subdir <- readLines(pipe(paste("ls", paths[step]))) for (instep in 1:length(subdir)) { filename <- paste(paths[step], subdir[instep], sep="/") shortname <- subdir[instep]; corename <- substring(shortname, 10, 14) visit_num <- substring(shortname, 17, 17) baso_test <- substring(filename, nchar(filename)-5, nchar(filename)) boo <- any(as.logical(grep(baso_test, "-Table"))) if (boo) { if (FILENUM == 0) FILENUM<-1 else FILENUM <- 2 # used below for define of data_array con <- file(filename, encoding="macroman") TABLE <- read.table(con, skip=1, sep="\t", check.names=FALSE, fill=TRUE, strip.white=TRUE) con <- file(filename, encoding="macroman") ROWS <- length(TABLE[[1]]) #GET TABLE LENGTH TO LEAVE OFF LAST TWO ROWS TABLE <- read.table(con, skip=1, sep="\t", check.names=FALSE, fill=TRUE, strip.white=TRUE, nrows=ROWS-2) cat(paste(filename, "\n")) # remove additional text from stim field TABLE[,1] <- substring(TABLE[,1],4,18) TABLE[,1] <- gsub(" ","_",TABLE[,1]) data_sum <- data.frame(experiment=corename, stim=TABLE[,1], baso_count=TABLE[,2], baso_freq=TABLE[,3], baso_CD69_MFI=TABLE[,4], baso_CD69_CV=TABLE[,5], baso_CD69_pctMAD=TABLE[,6], baso_CD203_MFI=TABLE[,7], baso_CD203_CV=TABLE[,8], baso_CD203_MAD=TABLE[,9], baso_CD123_MFI=TABLE[,10], baso_CD123_CV=TABLE[,11], baso_CD123_MAD=TABLE[,12], baso_CD63_MFI=TABLE[,13], baso_CD63_CV=TABLE[,14], baso_CD63_MAD=TABLE[,15], Q1=TABLE[,16], Q1_CD69_MFI=TABLE[,18], Q1_CD69_CV=TABLE[,17], Q1_CD69_MAD=TABLE[,19], Q1_CD63_MFI=TABLE[,21], Q1_CD63_CV=TABLE[,20], Q1_CD63_MAD=TABLE[,22], Q1_CD203_MFI=TABLE[,24], Q1_CD203_CV=TABLE[,23], Q1_CD203_MAD=TABLE[,25], Q1_CD123_MFI=TABLE[,27], Q1_CD123_CV=TABLE[,26], Q1_CD123_MAD=TABLE[,28], Q1_DR_MFI=TABLE[,29], Q2=TABLE[,30], Q2_CD69_MFI=TABLE[,32], Q2_CD69_CV=TABLE[,31], Q2_CD69_MAD=TABLE[,33], Q2_CD63_MFI=TABLE[,35], Q2_CD63_CV=TABLE[,34], Q2_CD63_MAD=TABLE[,36], Q2_CD203_MFI=TABLE[,38], Q2_CD203_CV=TABLE[,37], Q2_CD203_MAD=TABLE[,39], Q2_CD123_MFI=TABLE[,41], Q2_CD123_CV=TABLE[,40], Q2_CD123_MAD=TABLE[,42], Q2_DR_MFI=TABLE[,43], Q3=TABLE[,44], Q4=TABLE[,45], Q4_CD69_MFI=TABLE[,47], Q4_CD69_CV=TABLE[,46], Q4_CD69_MAD=TABLE[,48], Q4_CD63_MFI=TABLE[,50], Q4_CD63_CV=TABLE[,49], Q4_CD63_MAD=TABLE[,51], Q4_CD203_MFI=TABLE[,53], Q4_CD203_CV=TABLE[,52], Q4_CD203_MAD=TABLE[,54], Q4_CD123_MFI=TABLE[,56], Q4_CD123_CV=TABLE[,55], Q4_CD123_MAD=TABLE[,57], Q4_DR_MFI=TABLE[,58], visit_num=TABLE[,59]) if (FILENUM==1) {dat <<- data_sum} else {dat <<- rbind(dat, data_sum)} } else non_baso <- non_baso + 1 } } cat(paste("total number of folders:", paste(step, "\n"))) #remove funny character from flowjo files for (x in 4:59) { dat[,x] <- as.numeric(gsub("•","NA",dat[,x])) } if (export) { exportpath <- paste(readLines(pipe("pwd")),"/",sep="") con <- file(paste(exportpath, exportname, sep = ""), encoding="macroman") write.csv(dat, con) cat(paste("writing file", paste(exportpath, paste(exportname, "\n"), sep=""))) } } PM_BATCH("/Users/Shared/ShreffLab/Projects/PM_baso/data/outbox", "PM.csv", TRUE) # after running batch having saved each data_array as indicated below: rm(list=ls()) pm <- read.csv("PM.csv", as.is=TRUE) # note that as.is prevents read.table from creating factors # cleanup and create separate time variable for 'baseline', 'post1', 'post2' pm <- data.frame(pm, time = NA) i <- grep("pre", pm$stim, TRUE) pm$time[i] <- "baseline" i <- grep("post", pm$stim, TRUE) pm$time[i] <- "post_t1" i <- grep("_4", pm$stim, TRUE) pm$time[i] <- "post_t2" i <- grep("RPMI", pm$stim, TRUE) pm$stim[i] <- "RPMI" i <- grep("IgE", pm$stim, TRUE) pm$stim[i] <- "aIgE" i <- grep("fMLP", pm$stim, TRUE) pm$stim[i] <- "fMLP" # convert to factors pm$stim <- as.factor(pm$stim) pm$experiment <- as.factor(pm$experiment) pm$time <- as.factor(pm$time) #SOME PLOTS plot(log(pm$baso_CD63_MFI,10), log(pm$baso_CD203_MFI,10), type="n") points(log(subset(pm, stim == "RPMI" & time == "post_t2")$baso_CD63_MFI,10), log(subset(pm, stim == "RPMI" & time == "post_t2")$baso_CD203_MFI,10), col="grey", pch=19) points(log(subset(pm, stim == "RPMI")$baso_CD63_MFI,10), log(subset(pm, stim == "RPMI")$baso_CD203_MFI,10), col="green") #connect time points within each experiment for (step in 1:length(levels(pm$experiment))) { i <- pm$experiment == levels(pm$experiment)[step] lines(log(subset(pm[i,], stim=="RPMI")$baso_CD63_MFI,10), log(subset(pm[i,], stim=="RPMI")$baso_CD203_MFI,10), col=step) } points(log(subset(pm, stim == "aIgE" & time == "post_t2")$baso_CD63_MFI,10), log(subset(pm, stim == "aIgE" & time == "post_t2")$baso_CD203_MFI,10), col="grey", pch=19) points(log(subset(pm, stim == "aIgE")$baso_CD63_MFI,10), log(subset(pm, stim == "aIgE")$baso_CD203_MFI,10), col="red") #connect time points within each experiment for (step in 1:length(levels(pm$experiment))) { i <- pm$experiment == levels(pm$experiment)[step] lines(log(subset(pm[i,], stim=="aIgE")$baso_CD63_MFI,10), log(subset(pm[i,], stim=="aIgE")$baso_CD203_MFI,10), col=step) } write.csv(pm, "pm_data.csv")