# Pseudocode: # this function takes as input a single well (can be run with lapply from another call to do a list of wells...) # performs statistics on it, (mean, sd), and adds the series_name. # returns growth series data get_growth_series_for_well <- function(in_well, in_series, in_plate, read_con) { sql_query_list = get_sql_statement_for_well(in_well, in_plate, in_series) sql_parse_series_results = parse_sql_query_list(sql_query_list, read_con, in_series) # well list returns "A1, B1, C1..." etc. # Function takes a single well "A1" or can be run with lapply, as a list of wells "A1" "A2" .. # it creates an SQL statement for each well and runs it... # let's try returning the data from the function each time... Let's try that too. # I'm not sure which is best, so let's try it by just running the sql query, for two of them, then trying to add the series name to the data.frame # option is to run the SQL statement in this function, and to pass back the individual lots of data to the calling function... # or to keep concatenating the SQL results in this function and pass back the whole lot... # well, we want a data format like this: # read_time plate_number od col row temp day series # 1 2010-02-15 7 0.0695247 1 A 24.6 0 p7s1 # 2 2010-02-15 7 0.0665247 1 B 24.6 0 p7s1 print(sql_parse_series_results) return (sql_parse_series_results) } #end this function # returns the sql statement for getting a single well out, all dates. get_sql_statement_for_well <- function(in_well_for_sql, in_plate, in_series) { # these two lines get just the row and col out in_well_for_sql_row = substr(in_well_for_sql,1,1) in_well_for_sql_col_nchars = nchar(in_well_for_sql) in_well_for_sql_col = substr(in_well_for_sql,2,in_well_for_sql_col_nchars) SQL_statement_for_well = paste ("select read_time, plate_number, od, col, row, temp from 96_well.plate_data WHERE plate_number = ", in_plate, " AND row = '", in_well_for_sql_row, "' ", "AND col = ", in_well_for_sql_col, " ORDER BY read_time", sep="") return(SQL_statement_for_well) } parse_sql_query_list <- function(SQL_query_list_in, read_con, in_series) { print("debug1") SQL_query_results = dbGetQuery(read_con, SQL_query_list_in) #this gets back a dataframe with everything # Now convert print(SQL_query_list_in) print(read_con) print(in_series) print(SQL_query_results) print("debug2") SQL_query_results$read_time = as.Date(SQL_query_results$read_time) # convert read_time to date object SQL_query_results$day = SQL_query_results$read_time - SQL_query_results$read_time[1] #calculate days since first read [1], so you can plot days not dates SQL_query_results$day = as.integer(SQL_query_results$day) # You need to concatenate the od, where the date is the same, as the average, # and create a new col for the standard deviation # use ddply with your data.frame and split/summarize on read time # you could later here add a column for the number of replicates... SQL_query_results = ddply(SQL_query_results, "read_time", summarise, day = mean(day), od = mean(od), stdev = sd(od), temp = mean(temp), row = unique(row), col = unique(col), plate_number = unique(plate_number)) # now add the new column for the series_id label # find how many rows in this series, number_of_rows = length(row.names(SQL_query_results)) #in_series = "p12s1" # for temp series_list = c(rep(in_series, number_of_rows)) SQL_query_results = cbind(SQL_query_results, "series_id" = series_list) return (SQL_query_results) } # end parse sql function ## now do the stats on this # you need to take each of the OD values for a particular date, run them through a remove_outliers() function. # then calculate a new mean OD value and sd based on the removed outliers... # you also want to note in a new column, how many outliers were removed. and how many values are in the final statistic. # So,let's practice with this set of OD values: # test_ods = c(0.2, 0.5, 0.1, 0.3) # let's pause here, do the variance test after 20 reads on the two plates 22 and 23? from the other day, and then come back again to plot each of these series..... # you are almost done here.. # Just run a script to get out each well and each plate # Then figure out a nice way to store that data (ask Stackoverflow) # Then figure out the best way to plot these, and feed them into the model fitting program # it would help to first ask "What plots do I want to plot?, and what format does the data need to be in to enable me to do this" and then ask "What data do I want to fit using the model fitting package" and what format does the data need to be in for me to do this...?" # output to file, for import to excel, for example... #write.table(individual_well_series_od, file = "series_output_17_a_1", sep="\t") # What's the big plan here? # you have a series e.g. p3s1 A1:H1 or p6s1 A1:D1. # or pH series such as p18s1 = A1, A2, B1, B2 # So... you now think it is best to calculate each individual well at a time. Do the stats for outliers, then plot the growth curves and fit the model for each well. then take the average of the fitted parameters. # But you also need to plot each series (all 120 of them) # in order to check that they look ok. you also need to plot the fitted curve to verify it... # I think you need to go through each well # find outliers for each reading and remove them. # then print out a final series of growth points... # what is the best way to store this data? # sometimes you'll want it in long form, other times not... # maybe use melt and cast? # then feed this into the model fitting program. # also think about then feeding in a temperature series into the model fitting program. # In this case... Take the # Next, you need to do the outlier statistics on each read. # Then you need to think about what you want to plot exactly... # and how to store each series you have... ## and save the series as before. # you will then have exactly the same as you have now, expect you retrieved the series in a different way, but the output object will be the same...  and you can plot these based on the series. # but will this later allow you to create a number of new series based on individual wells rather than groups of wells? # yes it will, you just need a new naming convention for the series...  e.g. p6s1.1  p6s1.2     p6s1.3     p6s1.4   etc.    p6s1 could be the average of the series, p6s1.1 could mean each specific data point - perhaps?  come back to this if you need to, not difficult to change... # Then you can also plot based on this... #validate.input ("Z4") #validate.and.catch.error ("Z4")