# This is the code used to generate the data analysis and plots # Algal biomass constituent analysis: method uncertainties and investigation of the underlying measuring chemistries, # by Lieve Laurens (March-August 2011) # National Renewable Energy Laboratory ### based on previously published analysis code (Ed Wolfrum 2009): # pubs.acs.org/doi/suppl/10.1021/jf100807b/suppl_file/jf100807b_si_003.txt # pubs.acs.org/doi/suppl/10.1021/jf100807b/suppl_file/jf100807b_si_004.txt setwd("~/Documents/Algal Biomass Analysis") RoundRobin <- read.csv(file = "RoundRobin.csv", header = TRUE, stringsAsFactors = FALSE) # function to calculate the pooled SD SDp<-function(theColumn,theFactor){ diffs <- unlist(sapply(split(theColumn,theFactor),function(x) x-mean(x,na.rm=T))) df <- unlist(sapply(split(theColumn, theFactor),function(x) length(x) - 1)) SDp<-sqrt(sum(diffs^2,na.rm=T)/sum(df,na.rm=T)) SDp } data <- c("Moisture", "Ash", "Starch_M", "Starch_S", "Carbs", "Lipids_S", "Lipids_B", "Protein_B", "Protein_N") ############################### Table 1 ############################ # mean holds the simple mean of each variable # sd holds the "simple" standard deviation (SD) # rsd holds the relative standard deviation # SDp holds the pooled SD, by institution, researcher and day # N holds the number of non-NA values in the SD calculation mean <- mean(RoundRobin[,data],na.rm=T) sd <- sd(RoundRobin[,data],na.rm=T) rsd <- sd/mean * 100 # calculate the relative standard deviation per institution mean.1 <- mean(RoundRobin[which(RoundRobin$Institution == "Lab.1"),data],na.rm=T) sd.1 <- sd(RoundRobin[which(RoundRobin$Institution == "Lab.1"),data],na.rm=T) rsd.1 <- sd.1/mean.1 * 100 mean.2 <- mean(RoundRobin[which(RoundRobin$Institution == "Lab.2"),data],na.rm=T) sd.2 <- sd(RoundRobin[which(RoundRobin$Institution == "Lab.2"),data],na.rm=T) rsd.2 <- sd.2/mean.2 * 100 mean.3 <- mean(RoundRobin[which(RoundRobin$Institution == "Lab.3"),data],na.rm=T) sd.3 <- sd(RoundRobin[which(RoundRobin$Institution == "Lab.3"),data],na.rm=T) rsd.3 <- sd.3/mean.3 * 100 # calculate the pooled standard deviation per institution, researcher and day sdp.Inst <- apply(RoundRobin[,data],2, SDp, as.factor(RoundRobin$Institution)) sdp.Res <- apply(RoundRobin[,data],2, SDp, as.factor(RoundRobin$Researcher)) sdp.Day <- apply(RoundRobin[,data],2, SDp, as.factor(RoundRobin$Day)) N <- apply(RoundRobin[,data],2,function(x) sum(!is.na(x))) table <-data.frame(mean,sd,rsd, sdp.Inst, sdp.Res, sdp.Day, N) table <-t(table) table[c(1:6),] <- round(table[c(1:6),],2) table write.table(table,file="~/Documents/Algal biomass analysis/R/RoundRobinSummary.csv",sep=",") # the censored data # Identify Tukey outliers: # this takes a data vector and returns a copy of it # with the value of any Tukey outlier replaced with NA # it doesn't modify the function argument directly (e.g., it is call-by-value) tukey<-function(theDataCol){ j<- fivenum(theDataCol,na.rm=T) i<- IQR(theDataCol,na.rm=T) x<- which( (theDataColj[4]+1.5*i)) is.na(theDataCol)<-x theDataCol } #need to make sure the dataframe to be censored is also in matrix form, not a concatenated list # RoundRobin.C reflects the censored dataset RoundRobin.C <- RoundRobin subset <- as.matrix(RoundRobin[,data]) subset.C <- subset cols <- c(1:9) for (i in cols) subset.C[,i] <- tukey(subset[,i]) RoundRobin.C[,data] <- subset.C RoundRobin.C <- data.frame(RoundRobin.C) mean.C <- mean(RoundRobin.C[,data],na.rm=T) sd.C <- sd(RoundRobin.C[,data],na.rm=T) rsd.C <- sd.C/mean.C * 100 mean.C.1 <- mean(RoundRobin.C[which(RoundRobin.C$Institution == "Lab.1"),data],na.rm=T) sd.C.1 <- sd(RoundRobin.C[which(RoundRobin.C$Institution == "Lab.1"),data],na.rm=T) rsd.C.1 <- sd.C.1/mean.C.1 * 100 mean.C.2 <- mean(RoundRobin.C[which(RoundRobin.C$Institution == "Lab.2"),data],na.rm=T) sd.C.2 <- sd(RoundRobin.C[which(RoundRobin.C$Institution == "Lab.2"),data],na.rm=T) rsd.C.2 <- sd.C.2/mean.C.2 * 100 mean.C.3 <- mean(RoundRobin.C[which(RoundRobin.C$Institution == "Lab.3"),data],na.rm=T) sd.C.3 <- sd(RoundRobin.C[which(RoundRobin.C$Institution == "Lab.3"),data],na.rm=T) rsd.C.3 <- sd.C.3/mean.C.3 * 100 sdp.C.Inst <- apply(RoundRobin.C[,data],2, SDp, as.factor(RoundRobin$Institution)) sdp.C.Res <- apply(RoundRobin.C[,data],2, SDp, as.factor(RoundRobin$Researcher)) sdp.C.Day <- apply(RoundRobin.C[,data],2, SDp, as.factor(RoundRobin$Day)) N.C <-apply(RoundRobin.C[,data],2,function(x) sum(!is.na(x))) table <-data.frame(mean.C,sd.C, sdp.C.Inst, sdp.C.Res, sdp.C.Day,N.C) table <-t(table) table[c(1:6),] <-round(table[c(1:6),],2) table write.table(table,file="~/Documents/Algal biomass analysis/R/RoundRobinSummaryC.csv",sep=",") ############################### New Figure 1 ############################ msdBox<-function(theDataCol,theFactor, y.axis = "asdh", ylim = c(5,22), reference = 9.78, title = title){ theM2<-mean(theDataCol, na.rm=T) theM<-median(theDataCol,na.rm=T) theS<-fivenum(theDataCol, na.rm=T) j<-boxplot(theDataCol ~ theFactor, main = title,outpch = NA, yaxt = "n", ylim = ylim, cex.axis = 1.2, cex.lab = 1.5, par(adj = 0.5, mar = c(5.1, 6.1, 4.1, 2.1))) x<-c(0,length(j$names)+1,length(j$names)+1,0) y<-c(theS[2],theS[2],theS[4],theS[4]) polygon(x,y,density=NA, col="gray") abline(h=theM,lwd=3) abline(h = reference, lty = "dashed", col = "red", lwd = 3) boxplot(theDataCol ~ theFactor, show.names=F, ylab = y.axis,col="grey95",add=T, outpch=NA) } par(mfrow=c(2,4),mar=c(3,4,2,2)) msdBox(RoundRobin.C$Moisture, RoundRobin.C$Institution,y.axis = "Moisture", title = "Moisture", reference = "", ylim = c(0,10) ) msdBox(RoundRobin.C$Ash, RoundRobin.C$Institution,y.axis = "Ash", title = "Ash", reference = "", ylim = c(0,10)) msdBox(RoundRobin.C$Lipids_S, RoundRobin.C$Institution,y.axis = "Lipids_S", title = "Lipids_S", reference = 9.78, ylim = c(5,22)) msdBox(RoundRobin.C$Lipids_B, RoundRobin.C$Institution,y.axis = "Lipids_B",title = "Lipids_B", reference = 9.78, ylim = c(5,22)) msdBox(RoundRobin.C$Carbs, RoundRobin.C$Institution,y.axis = "Carbs", title = "Carbs",reference = 17.94, ylim = c(5,35)) msdBox(RoundRobin.C$Starch, RoundRobin.C$Institution,y.axis = "Starch", title = "Starch", reference = 17.94, ylim = c(5,35)) msdBox(RoundRobin.C$Protein_B, RoundRobin.C$Institution,y.axis = "Protein_B",title = "Protein_B", reference = 38.06, ylim = c(10,70)) msdBox(RoundRobin.C$Protein_N, RoundRobin.C$Institution,y.axis = "Protein_N", title = "Protein_N", reference = 38.06, ylim = c(10,70)) ############################### Table 2 ############################ # Analysis of variance within and between the researchers - list significance for each variable #on censored dataset j<-aov(Starch_M~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Starch_S~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Protein_N~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Protein_B~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Carbs~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Ash~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Moisture~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Lipids_B~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) j<-aov(Lipids_S~factor(Institution)+factor(Researcher)+factor(Day),data=RoundRobin.C, na.action = na.omit);summary(j) ############################### END ############################