#for analysis PM project basohpil data
rm(list=ls())
PM_BATCH <-function(directory, exportname="", export=FALSE) {
dir <- readLines(pipe(paste("ls", directory)))
paths <- paste(directory, dir, sep="/")
FILENUM <- 0
#non_data <- 0
#cv <- function(x) 100*((sqrt(var(x)/length(x)))/mean(x)) #define CV
non_baso <- 0

cat("processing . . .\n")

for (step in 1:length(paths)) {
	subdir <- readLines(pipe(paste("ls", paths[step])))
	for (instep in 1:length(subdir)) {
		filename <- paste(paths[step], subdir[instep], sep="/")
		shortname <- subdir[instep]; corename <- substring(shortname, 10, 14)
		visit_num <- substring(shortname, 17, 17)
		baso_test <- substring(filename, nchar(filename)-5, nchar(filename))
		boo <- any(as.logical(grep(baso_test, "-Table")))
		if (boo) {
			if (FILENUM == 0) FILENUM<-1 else FILENUM <- 2 # used below for define of data_array
			con <- file(filename, encoding="macroman")
			TABLE <- read.table(con, skip=1, sep="\t", check.names=FALSE, fill=TRUE, strip.white=TRUE)
			con <- file(filename, encoding="macroman")
			ROWS <- length(TABLE[[1]])		#GET TABLE LENGTH TO LEAVE OFF LAST TWO ROWS
			TABLE <- read.table(con, skip=1, sep="\t", check.names=FALSE, fill=TRUE, strip.white=TRUE, nrows=ROWS-2)
			cat(paste(filename, "\n"))
			# remove additional text from stim field
			TABLE[,1] <- substring(TABLE[,1],4,18)
			TABLE[,1] <- gsub(" ","_",TABLE[,1])

			data_sum <- data.frame(experiment=corename, stim=TABLE[,1], baso_count=TABLE[,2], baso_freq=TABLE[,3], baso_CD69_MFI=TABLE[,4], baso_CD69_CV=TABLE[,5], baso_CD69_pctMAD=TABLE[,6], baso_CD203_MFI=TABLE[,7], baso_CD203_CV=TABLE[,8], baso_CD203_MAD=TABLE[,9], baso_CD123_MFI=TABLE[,10], baso_CD123_CV=TABLE[,11], baso_CD123_MAD=TABLE[,12], baso_CD63_MFI=TABLE[,13], baso_CD63_CV=TABLE[,14], baso_CD63_MAD=TABLE[,15], Q1=TABLE[,16], Q1_CD69_MFI=TABLE[,18], Q1_CD69_CV=TABLE[,17], Q1_CD69_MAD=TABLE[,19], Q1_CD63_MFI=TABLE[,21], Q1_CD63_CV=TABLE[,20], Q1_CD63_MAD=TABLE[,22], Q1_CD203_MFI=TABLE[,24], Q1_CD203_CV=TABLE[,23], Q1_CD203_MAD=TABLE[,25], Q1_CD123_MFI=TABLE[,27], Q1_CD123_CV=TABLE[,26], Q1_CD123_MAD=TABLE[,28], Q1_DR_MFI=TABLE[,29], Q2=TABLE[,30], Q2_CD69_MFI=TABLE[,32], Q2_CD69_CV=TABLE[,31], Q2_CD69_MAD=TABLE[,33], Q2_CD63_MFI=TABLE[,35], Q2_CD63_CV=TABLE[,34], Q2_CD63_MAD=TABLE[,36], Q2_CD203_MFI=TABLE[,38], Q2_CD203_CV=TABLE[,37], Q2_CD203_MAD=TABLE[,39], Q2_CD123_MFI=TABLE[,41], Q2_CD123_CV=TABLE[,40], Q2_CD123_MAD=TABLE[,42], Q2_DR_MFI=TABLE[,43], Q3=TABLE[,44], Q4=TABLE[,45], Q4_CD69_MFI=TABLE[,47], Q4_CD69_CV=TABLE[,46], Q4_CD69_MAD=TABLE[,48], Q4_CD63_MFI=TABLE[,50], Q4_CD63_CV=TABLE[,49], Q4_CD63_MAD=TABLE[,51], Q4_CD203_MFI=TABLE[,53], Q4_CD203_CV=TABLE[,52], Q4_CD203_MAD=TABLE[,54], Q4_CD123_MFI=TABLE[,56], Q4_CD123_CV=TABLE[,55], Q4_CD123_MAD=TABLE[,57], Q4_DR_MFI=TABLE[,58], visit_num=TABLE[,59])
				if (FILENUM==1) {dat <<- data_sum}
				else {dat <<- rbind(dat, data_sum)}			}
		else non_baso <- non_baso + 1	
	}

}
cat(paste("total number of folders:", paste(step, "\n")))

#remove funny character from flowjo files
for (x in 4:59) {
	dat[,x] <- as.numeric(gsub("•","NA",dat[,x]))
	}


if (export) {
	exportpath <- paste(readLines(pipe("pwd")),"/",sep="")
	con <- file(paste(exportpath, exportname, sep = ""), encoding="macroman")
	write.csv(dat, con)
	cat(paste("writing file", paste(exportpath, paste(exportname, "\n"), sep="")))
	}
}

PM_BATCH("/Users/Shared/ShreffLab/Projects/PM_baso/data/outbox", "PM.csv", TRUE)

# after running batch having saved each data_array as indicated below:
rm(list=ls())
pm <- read.csv("PM.csv", as.is=TRUE) # note that as.is prevents read.table from creating factors

# cleanup and create separate time variable for 'baseline', 'post1', 'post2'
pm <- data.frame(pm, time = NA)

i <- grep("pre", pm$stim, TRUE)
pm$time[i] <- "baseline"

i <- grep("post", pm$stim, TRUE)
pm$time[i] <- "post_t1"

i <- grep("_4", pm$stim, TRUE)
pm$time[i] <- "post_t2"

i <- grep("RPMI", pm$stim, TRUE)
pm$stim[i] <- "RPMI"

i <- grep("IgE", pm$stim, TRUE)
pm$stim[i] <- "aIgE"

i <- grep("fMLP", pm$stim, TRUE)
pm$stim[i] <- "fMLP"

# convert to factors
pm$stim <- as.factor(pm$stim)
pm$experiment <- as.factor(pm$experiment)
pm$time <- as.factor(pm$time)

	
#SOME PLOTS

plot(log(pm$baso_CD63_MFI,10), log(pm$baso_CD203_MFI,10), type="n")
points(log(subset(pm, stim == "RPMI" & time == "post_t2")$baso_CD63_MFI,10), log(subset(pm, stim == "RPMI" & time == "post_t2")$baso_CD203_MFI,10), col="grey", pch=19)
points(log(subset(pm, stim == "RPMI")$baso_CD63_MFI,10), log(subset(pm, stim == "RPMI")$baso_CD203_MFI,10), col="green")

#connect time points within each experiment

for (step in 1:length(levels(pm$experiment))) {
	i <- pm$experiment == levels(pm$experiment)[step]
	lines(log(subset(pm[i,], stim=="RPMI")$baso_CD63_MFI,10), log(subset(pm[i,], stim=="RPMI")$baso_CD203_MFI,10), col=step)
	}


points(log(subset(pm, stim == "aIgE" & time == "post_t2")$baso_CD63_MFI,10), log(subset(pm, stim == "aIgE" & time == "post_t2")$baso_CD203_MFI,10), col="grey", pch=19)
points(log(subset(pm, stim == "aIgE")$baso_CD63_MFI,10), log(subset(pm, stim == "aIgE")$baso_CD203_MFI,10), col="red")

#connect time points within each experiment

for (step in 1:length(levels(pm$experiment))) {
	i <- pm$experiment == levels(pm$experiment)[step]
	lines(log(subset(pm[i,], stim=="aIgE")$baso_CD63_MFI,10), log(subset(pm[i,], stim=="aIgE")$baso_CD203_MFI,10), col=step)
	}


write.csv(pm, "pm_data.csv")