############################################################# # TDStats v2014.1 # # Written by CN Stephan, 2012 # ############################################################# ## Instructions: ## Facial soft tissue depth (FSTD) file must be of the type ".cvs" ## The FSTD file must contain data of at least one study with n>10. ## FSTD file must be in format similar to C-Table: ## -Data should be organized by column under landmark headers. ## -The first landmark should populate column 11 of the table. ## -The first 10 columns should contain the following descriptive information, in sequence: ## Study, Year, Published, Study Code, Method, Sample, Population, Region, Age, Sex. ## For exemplar formatting, see the C-Table @ CRANIOFACIALidentification.com ## Import libraries required.packages <- c("tcltk", "gtools", "Hmisc", "PerformanceAnalytics", "sm", "vioplot", "xts", "zoo") new.packages <- required.packages[!(required.packages %in% installed.packages()[,"Package"])] if(length(new.packages)) install.packages(new.packages) library(tcltk) ## for GUI library(Hmisc) ## for BOXPLOTS library(gtools) ## for additional BOXPLOT DATA / VIOLIN PLOT library(PerformanceAnalytics) ## for additional BOXPLOT DATA library(zoo) ## for PerformanceAnalytics library(xts) ## for PerformanceAnalytics library(vioplot) ## for VIOLIN PLOT library(sm) ## for SEX, AGE and FINAL BOX plots ## Display Logo Window R.base.dir <- system.file() setwd(paste(R.base.dir,"/../../doc/html",sep="")) image <- as.numeric(file.access(paste(R.base.dir,"/../../doc/html/TDStats_logo.gif",sep=""), mode=0)) if (image==-1) { tkmessageBox(title="TDStats", message="You are missing the TDStats logo. \n\nTo use TDStats you must save the TDStats logo to the following path: \n\nC:/Program Files/R/R-[version]/doc/html. \n\n See CRANIOFACIALidentification.com for more information.",icon="warning") q() } if (image==0) { tt <- tktoplevel() tkwm.title(tt, "TDStats") tkwm.geometry(tt,"600x280+30+30") image1<-tclVar() tcl("image","create","photo",image1,file="TDStats_logo.gif") imgAsLabel<-tklabel(tt,image=image1,bg="white") tkpack(imgAsLabel) tkconfigure(tt,cursor="watch") Sys.sleep (10) tkconfigure(tt,cursor="arrow") tkdestroy(tt) ## Terms of use tkmessageBox(title="TDStats", message="By using this program you agree to CRANIOFACIALidentification.com's 'Terms of Use'. \n\nNO WARRANTIES OR GUARANTEES OF ANY KIND are provided with TDStats. You use TDStats completely, and entirely, at your own risk. \n\nSee www.CRANIOFACIALidentification.com for further details. \n ------------------------------------------------------------------------\n\nInstructions: \n- The facial soft tissue depth (FSTD) file must be of the type '.cvs'.\n- The FSTD file must contain data from at least one study with N>10. \n- The FSTD file must be in format similar to C-Table:\na) Data should be organized by column under landmark headers.\nb) The first landmark should populate column 11 of the table.\nc) The first 10 columns should contain the following descriptive information, in sequence: Study, Year, Published, Study Code, Method, Sample, Population, Region, Age, Sex.\n\nTDStats can handle data for a single study or multiple studies. It will also analyze a few or many landmarks. For exemplar formatting, see CRANIOFACIALidentification.com.\n\nNote:\n- TDStats automatically removes right side bilateral landmarks to report midline and left side landmarks only.\n- TDStats will round the data to the nearest whole value to calculate pooled modes.\n- TDStats uses raw values for all other calculations, and will report correlations to two decimal places, and all other statistics to one decimal place.\n",icon="info",type="ok") ## Set working directory directory <- tclvalue(tkchooseDirectory(title="Select the WORKING DIRECTORY, i.e., where your FSTD .cvs file is located and where you want the new data files written to.")) setwd(directory) ## Load the raw FSTD data, pull out landmarks, and make numeric matrix tddirectory <- tclvalue(tkgetOpenFile(title="Open the .csv data file containing the facial soft tissue depths you wish to analyse")) td <- read.csv(tddirectory, header = TRUE, na.strings = "", skip = 2) ## Set the number of LANDMARKS to be analyzed dlg<-tktoplevel() tkwm.title(dlg,"TDStats") tkwm.geometry(dlg,"+30+30") tkgrid(tklabel(dlg,text="Enter the number of landmarks contained in the dataset: \n"), columnspan=4) mlmks <- tclVar() blmks <- tclVar() tlmks <- tclVar() entry.mlmks <- tkentry(dlg,width="5",textvariable=mlmks) tkgrid(tklabel(dlg,text="Midline landmarks (a) = 16, for the default C-Table:"),entry.mlmks) entry.blmks <- tkentry(dlg,width="5",textvariable=blmks) tkgrid(tklabel(dlg,text="Bilateral landmarks (b) = 11, for the default C-Table:"),entry.blmks) entry.tlmks <- tkentry(dlg,width="5",textvariable=tlmks) tkgrid(tklabel(dlg,text="Total landmarks (a+[b*2]) = 38, for the default C-Table:"),entry.tlmks) OnOK <- function () { NameVal1 <- tclvalue(mlmks) NameVal2 <- tclvalue(blmks) tkdestroy(dlg) } OK.but<-tkbutton(dlg,text="OK",command=OnOK) tkgrid(OK.but, columnspan=4) tkfocus(dlg) tkbind(dlg, "", function() {tkgrab.release(dlg)}) tkwait.window(dlg) mid_lmks <- as.numeric(tclvalue(mlmks)) bi_lmks <- as.numeric(tclvalue(blmks)) t_lmks <- as.numeric(tclvalue(tlmks)) Almks <- mid_lmks+bi_lmks # Warning for landmark counts if (mid_lmks+(bi_lmks*2)!=t_lmks) { tkmessageBox(title="Error", message="Landmarks have been counted incorrectly. TDStats will now close. \n",icon="warning") q() } tkmessageBox(title="TDStats", message="Data successfully loaded! \n\n TDStats will now perform a number of functions (to include boxplots, scatter plots, histograms, density plots, means, shorths, 75-shormaxes and more) in the style of: Stephan et al. (2013) Facial Soft Tissue Depth Statistics and Enhanced Point Estimators for Craniofacial Identification: The Debut of the Shorth and 75-Shormax, Journal of Forensic Sciences 58(6):1439-1457. \n\nThis will take a couple of minutes to complete. Sit back and relax.....do not panic if your R window temporarily disappears, freezes or states 'not responding'. TDStats will momentarily give your machine's memory and processor a workout. Flashing boxes and/or a busy cursor display are good signs. TDStats will let you know when data analysis is complete.\n\nOutput files will be placed into your nominated working directory. Graphs will be seperately outputted in raw formats to seperate directories and in a collated pdf format.\n\nPlease note that:\n1. Studies with a sample size N<10 will be automatically removed for study specific analyses, however, ALL entered studies and data will be used in the pooled analysis except where sample size for any landmark is <10 (these landmarks will be automatically dropped from the analysis). Details of the dropped studies can be found in the 'dropped_studies.txt' output. \n2. For sex comparisons at any landmark, too few sex specfic data will result in blank plots for that landmark.",icon="info") ## Get landmark labels cnames_raw <- colnames(td) mlmk_s <- 10+1 bilmk_s <- 10+mid_lmks+1 ## Number of studies & keep only those where n>=10 study_factor <- factor(td[,1]) study_n <- summary(study_factor) study_level <- levels(study_factor) Tstudy <- c() for (i in 1:length(study_n)) { if (study_n[[i]]>=10) { Tstudy <- c(Tstudy,study_level[i]) } } Rstudy <- c() for (i in 1:length(study_n)) { if (study_n[[i]]<=10) { Rstudy <- c(Rstudy,study_level[i]) } } l_Rstudy <- length(Rstudy) l_Tstudy <- length(Tstudy) df_Rstudy <- as.data.frame(Rstudy) if (l_Rstudy>0) {colnames(df_Rstudy) <- "Studies Removed from Boxplots"} wait<-tkmessageBox(title="TDStats", message=paste(l_Rstudy," studies were removed because they have a sample size <10.\n \n See 'dropped_studies.txt' for details."), icon="info",type="ok") # study code labels code <- c() for (i in 1:length(Tstudy)) { X <- paste("[",i,"]",sep="") code <- append(code,X) } study_code <- cbind("Studies for Boxplots"=Tstudy,Code=code) ## Break out studies & then combine td_trim <- c() for (i in 1:length(Tstudy)) { assign(paste("Data_",i,sep=""), subset(td, Study == Tstudy[i])) td_trim <- rbind(td_trim,get(paste("Data_",i,sep=""))) } td3 <- td_trim[,-which(apply(td_trim,2, function (x) all(is.na(x))))] temp <- apply(td3,2, function (x) length(which(!is.na(x)))<10) temp1 <- length(which(temp==TRUE)) if (temp1>0) { td3 <- td_trim[,-which(apply(td_trim,2, function (x) length(which(!is.na(x)))<10))] } else {td3 <- td3} tnames <- colnames(td3) l_td <- length(tnames) start_pos <- 11 start_1 <- 10 tbi_lmks <- length(grep("R_",tnames)) landmarks <- tnames[start_pos:(l_td-tbi_lmks)] ## For each data table collect required summary data for (i in 1:length(Tstudy)){ tdA <- get(paste("Data_",i,sep="")) tdA <- tdA[,11:(10+Almks)] tdB <- tdA[,-which(apply(tdA,2, function (x) all(is.na(x))))] temp <- apply(tdB,2, function (x) length(which(!is.na(x)))<10) temp1 <- length(which(temp==TRUE)) if (temp1>0) { tdC <- tdB[,-which(apply(tdB,2, function (x) length(which(!is.na(x)))<10))] } else {tdC <- tdB} l_td <- length(tdC) lmks <- as.list(tdC) col_ns <- names(lmks) tlmks <- c() ## list of data with NAs removed for (k in 1:l_td) { tlmks <- c(tlmks, list(sort(lmks[[k]]))) } names(tlmks) <- col_ns l_tlmks <- c() ## vector of lengths of tlmks vectors for (k in 1:length(tlmks)) { l_tlmks <- c(l_tlmks, length(tlmks[[k]])) } shls <- c() ## get vector of 1/2 no. of observations +1 for (k in 1:length(tlmks)) { shls <- c(shls, floor(length(tlmks[[k]])/2+1)) } shls2 <- c() ## get vector of 1/2 no. of observations for (k in 1:length(tlmks)) { shls2 <- c(shls2, floor(length(tlmks[[k]])/2)) } ## get vectors of distances for (k in 1:length(tlmks)) { vdist <- c() for (j in 1: l_tlmks[k]) { if (j < shls2[k]) { assign(paste("svdist",k,sep="_"), vdist <- c(vdist, abs(tlmks[[k]][j]-tlmks[[k]][j+shls[k]]))) } } } rm(vdist) vdist <- sapply(mixedsort(ls(pat="svdist_")), get, simplify = FALSE) names(vdist) <- col_ns ## Get Shortest half md <- sapply(vdist, function (a) which.min(a)) lowerb <- as.vector(md) upperb <- lowerb + shls ## Gest Shortest half vector from raw data shvs <- c() for (k in 1:length(tlmks)) { shvs <- c(shvs, list(tlmks[[k]][lowerb[k]:upperb[k]])) } names(shvs) <- col_ns ## Get Summary Data sumdata <- round(rbind(sapply(tlmks, length), sapply(tlmks, min), sapply(tlmks, max), sapply(tlmks, function(a) ((max(a)-min(a))/2)+min(a)),sapply(tlmks, mean), sapply(tlmks, sd), sapply(tlmks, median), sapply(tlmks, mad, const=1), sapply(tlmks, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(tlmks, skewness), sapply(tlmks, kurtosis), shls, sapply(shvs, function (b) min(b)), sapply(shvs, function(c) max(c)), sapply(shvs, function(a) ((max(a)-min(a))/2)+min(a)),sapply(shvs, mean), sapply(shvs, sd), sapply(shvs, median), sapply(shvs, mad, const=1), sapply(shvs, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(shvs, skewness), sapply(shvs, kurtosis)), digits=1) rownames(sumdata) <- c("n", "min", "max", "half_range", "mean", "sd", "median","MAD", "mode", "skewness", "kurtosis", "SH_n", "SH_min", "SH_max", "SH_half_range", "SH_mean (Shorth)", "SH_sd", "SH_median","SH_MAD", "SH_mode", "SH_skewness", "SH_kurtosis") ## Get 75 quantile for data between the SHORTH and MAX quant75 <- c() for (m in 1:length(tlmks)) { quant75 <- c(quant75, quantile(subset(tlmks[[m]], tlmks[[m]]>=sumdata[16,m]), 0.75)) } names(quant75) <- col_ns Shormax <-round(quant75, digits=1) sumdata_final<-rbind(sumdata,Shormax) assign(paste("SumData",i,sep=""),sumdata_final) write.csv(sumdata_final, file = paste(directory,"/BoxPlotSummary_Study",i,".csv", sep="")) ## save out data to home directory @ one decimal place } sink("study_codes.txt"); print(study_code); sink () print("saved study_codes.txt") sink("dropped_studies.txt"); print(df_Rstudy); sink () print("saved dropped_studies.txt") if (l_Rstudy==0) { td4 <- vector("list", 1) td4[[1]]<-SumData1 } else {td4<-sapply(mixedsort(ls(pat="SumData")), get, simplify = FALSE)} names(td4) <- Tstudy sink("study_summaryData.txt"); print(td4); sink() print("saved study_summaryData.txt") ## Generate Boxplots: # Create NA filled matrix for boxplot extras and add data print("starting study boxplot analysis...") td5 <- vector("list", length(Tstudy)) temp_m <- matrix(NA,23,length(landmarks)) colnames(temp_m) <- landmarks rownames(temp_m) <- c("n", "min", "max", "half_range", "mean", "sd", "median","MAD", "mode", "skewness", "kurtosis", "SH_n", "SH_min", "SH_max", "SH_half_range", "SH_mean (Shorth)", "SH_sd", "SH_median","SH_MAD", "SH_mode", "SH_skewness", "SH_kurtosis", "Shormax") for (i in 1:length(Tstudy)) td5[[i]]<-temp_m names(td5) <- Tstudy for (i in 1: length(Tstudy)) { x<-colnames(td4[[i]]) for (k in 1:length(x)) { td5[[Tstudy[i]]][,x[k]]<- td4[[Tstudy[i]]][,x[k]] } } ## Generate JPEG Boxplots: for (i in 1:length(landmarks)) { dev.new() jpeg(filename = paste("BoxPlot_byStudy_",landmarks[i],".jpg", sep=""), width = 23, height = 15, units = "cm", res = 600) par(mar = c(3, 3, 3, 2) + 0.1, lwd = "1.8", font.lab = "2") ymax <- max(sort(td3[,i+start_1]))+2 boxplot(td3[[landmarks[i]]]~factor(td3[[1]]), data = td3, names = FALSE, varwidth = FALSE, boxwex = 0.5, main = landmarks[i], ylim = c(0,ymax), yaxt = "n", cex.main = 2.5) axis(side = 2, las = 2, at = NULL, cex.axis = 1.5, lwd = 1.8) axis(side = 1, labels = code, at = 1:length(code), cex.axis = 1.5, las = 1, lwd = 1.8) for (j in 1:length(Tstudy)) { v_mean <- td5[[j]][,landmarks[i]][5] shmin <- td5[[j]][,landmarks[i]][13] shmax <- td5[[j]][,landmarks[i]][14] points(j,v_mean, pch=19) ## Mean segments(j+0.45, shmin, j+0.45, shmax, lwd=1.8, col="gray30") ## SH height segments(j+0.35, shmin, j+0.45, shmin, lwd=1.8, col="gray30") ## SH bottom tickmark segments(j+0.35, shmax, j+0.45, shmax, lwd=1.8, col="gray30") ## SH top tickmark } dev.off() graphics.off() } ## PDF of study boxplots dev.new() pdf(file="FSTD_Study_boxplots.pdf") par(mfrow=c(1,1)) for (i in 1:length(landmarks)) { ymax <- max(sort(td3[,i+start_1]))+2 boxplot(td3[[landmarks[i]]]~factor(td3[[1]]), data = td3, names = FALSE, varwidth = FALSE, boxwex = 0.5, main = landmarks[i], ylim = c(0,ymax), yaxt = "n", cex.main = 2.5) axis(side = 2, las = 2, at = NULL, cex.axis = 1.5, lwd = 1.8) axis(side = 1, labels = code, at = 1:length(code), cex.axis = 1.5, las = 1, lwd = 1.8) for (j in 1:length(Tstudy)) { v_mean <- td5[[j]][,landmarks[i]][5] shmin <- td5[[j]][,landmarks[i]][13] shmax <- td5[[j]][,landmarks[i]][14] points(j,v_mean, pch=19) ## Mean segments(j+0.45, shmin, j+0.45, shmax, lwd=1.8, col="gray30") ## SH height segments(j+0.35, shmin, j+0.45, shmin, lwd=1.8, col="gray30") ## SH bottom tickmark segments(j+0.35, shmax, j+0.45, shmax, lwd=1.8, col="gray30") ## SH top tickmark } dev.off() graphics.off() } if (l_Tstudy==1) {print(paste("saved summary boxplots for ",l_Tstudy," study",sep="")) } else {print(paste("saved summary boxplots for ",l_Tstudy," studies",sep=""))} ## Violing plots print("starting pooled analysis for violin plots...") landmarks_all <- colnames(td3) l_td <- length(landmarks_all) vp_bi_lmks <- length(grep("R_",tnames)) vp_end <- (l_td-vp_bi_lmks) landmarks <- landmarks_all[start_pos:vp_end] landmark_number <- length(landmarks) ## Split out landmarks, and combine into list for (i in start_pos:vp_end) { assign(paste("lmk_vp",i, sep="_"), td3[,i]) } vp_lmks <- sapply(mixedsort(ls(pat="lmk_vp")), get, simplify = FALSE) for (i in 1:length(start_pos:vp_end)) { if (length(which(is.na(vp_lmks[[i]])))==0) { assign(paste("tlmk_vp",i,sep="_"), vp_lmks[[i]]) } else { assign(paste("tlmk_vp",i, sep="_"), vp_lmks[[i]][-which(is.na(vp_lmks[[i]]))]) } } vp_tlmks <- sapply(mixedsort(ls(pat="tlmk_vp")), get, simplify = FALSE) names(vp_tlmks) <- landmarks ## PDF of violin plots dev.new() pdf(file="FSTD_Violin_plots.pdf") par(mfrow=c(2,6)) for (i in 1:landmark_number) { maxi <- max(vp_tlmks[[i]])+2 vioplot(vp_tlmks[[i]], drawRect = FALSE, names = FALSE, col="black", ylim = c(0,maxi)) axis(side = 1, labels = landmarks[i], at = 1, cex.axis=2) axis(side = 2, ylim = c(0,maxi)) } dev.off() graphics.off() ## JPEGs of Violin plots for (i in 1:landmark_number) { dev.new() par(mar=c(6,6,1,1)+0.1) jpeg(filename = paste("Vioplot_",landmarks[i],".jpg",sep=""), width = 8, height = 25, units = "cm", res = 300) maxi <- max(vp_tlmks[[i]])+2 vioplot(vp_tlmks[[i]], drawRect = FALSE, names = FALSE, col="black", ylim = c(0,maxi)) axis(side = 1, labels = landmarks[i], at = 1, cex.axis=2) axis(side = 2, ylim = c(0,maxi)) dev.off() graphics.off() } print(paste("saved violin plots for ",landmark_number," landmarks",sep="")) ## Correlation Matrix corrmat <- round(cor(td3[,start_pos:(l_td-tbi_lmks)],td3[,start_pos:(l_td-tbi_lmks)],use="pairwise.complete.obs",method="pearson"),digits=2) write.csv(corrmat, file = "correlation_matrix.csv") print(paste("saved correlation matrix for ",landmark_number," landmarks",sep="")) ## JPEGs of Density Distributions by Sex print("starting density plot analysis by sex...") for (i in 1:landmark_number) { assign(paste("sex_lmk",i,sep=""), na.omit(td3[,c(1,start_1+i)])) dev.new() jpeg(filename = paste("Sex_", landmarks[i],".jpg", sep=""), width = 10, height = 15, units = "cm", res = 300) X<-na.omit(td3[,c(10,start_1+i)]) Xlevels<-summary(factor(X[,1])) sm.density.compare(X[,2],X[,1], xlab="mm", col=c("black", "darkgray"), lty=c(1,1)) legend("topright", inset=.05, title="Sex",c("Females","Males"),cex=0.5, fill=c("black", "darkgray")) title(main=landmarks[i]) dev.off() graphics.off() } ## PDF of Density Distributions by Sex dev.new() pdf(file="FSTD_Sex_densities.pdf") par(mfrow=c(2,3)) for (i in 1:landmark_number) { x<-td3[,c(10,start_1+i)] X<-na.omit(x) Xlevels<-summary(factor(X[,1])) sm.density.compare(X[,2],X[,1], xlab="mm", col=c("black", "darkgray"), lty=c(1,1)) legend("topright", inset=.05, title="Sex",c("Females","Males"),cex=0.5, fill=c("black", "darkgray")) title(main=landmarks[i]) } dev.off() graphics.off() print(paste("saved ",landmark_number," sex density plots",sep="")) ## PDF of Scatterplots by Age print("starting scatterplot analysis by age...") corage1 <- td3[,c(9,start_pos:vp_end)] row.names(corage1) <- NULL corage2 <- data.frame(data.matrix(subset(corage1, !is.na(Age)))) corage3 <- round(cor(corage2[,1],corage2[,2:length(corage2)],use="pairwise.complete.obs",method="pearson"),digits=2) age_lmks <- names(corage2[2:length(corage2)]) dev.new() pdf(file="FSTD_Age_scatter.pdf") par(mfrow=c(2,3)) for (i in 2:length(age_lmks)) { X<-corage2[,c(1,1+i)] ymax <- max(X[,2]*1.4, na.rm=TRUE) reg1 <- lm(X[,2]~X[,1]) plot(X[,1], X[,2], xlab="Age (years)", xlim=c(0,100), ylab="mm", ylim=c(0,ymax)) title(main=age_lmks[i]) mtext(paste("r=",corage3[i],sep=""),adj=1,cex=0.7) abline(reg1, col="red") } dev.off() graphics.off() ## JPEGs of Scatterplots by Age for (i in 2:length(age_lmks)) { dev.new() jpeg(filename = paste("Age_", age_lmks[i],".jpg", sep=""), width = 10, height = 15, units = "cm", res = 300) X<-corage2[,c(1,1+i)] reg1 <- lm(X[,2]~X[,1]) plot(X[,1], X[,2], xlab="Age (years)", xlim=c(0,100), ylab="mm", ylim=c(0,max(X[,2]*1.4, na.rm=TRUE))) title(main=age_lmks[i]) mtext(paste("r=",corage3[i],sep=""),adj=1) abline(reg1, col="red") dev.off() graphics.off() } print(paste("saved ",landmark_number," age scatterplots",sep="")) #Histograms with adjoining boxplot, shortest half, shorth and 75-shormax ## Split out landmarks, and combine into list tnames <- colnames(td3) l_td <- length(tnames) start_pos <- 11 start_1 <- 10 tbi_lmks <- length(grep("R_",tnames)) landmarks <- tnames[start_pos:(l_td-tbi_lmks)] td5 <- data.frame(data.matrix(td3[,1:(l_td-tbi_lmks)])) row.names(td5) <- NULL temp <- apply(td5,2, function (x) length(which(!is.na(x)))<10) temp1 <- length(which(temp==TRUE)) if (temp1>0) { td6 <- td5[,-which(apply(td5,2, function (x) length(which(!is.na(x)))<10))] } else {td6 <- td5} for (i in start_pos:(l_td-tbi_lmks)) { assign(paste("all_lmk",i, sep="_"), td6[,i]) } all_lmks <- sapply(mixedsort(ls(pat="all_lmk_")), get, simplify = FALSE) for (i in 1:length(start_pos:(l_td-tbi_lmks))) { if (length(which(is.na(all_lmks[[i]])))==0) { assign(paste("all_tlmk",i, sep="_"), sort(all_lmks[[i]])) } else { assign(paste("all_tlmk",i, sep="_"), sort(all_lmks[[i]][-which(is.na(all_lmks[[i]]))])) } } all_tlmks <- sapply(mixedsort(ls(pat="all_tlmk_")), get, simplify = FALSE) l_tlmks <- c() ## vector of lengths of tlmks vectors for (k in 1:length(all_tlmks)) { l_tlmks <- c(l_tlmks, length(all_tlmks[[k]])) } shls <- c() ## get vector of 1/2 no. of observations +1 for (k in 1:length(all_tlmks)) { shls <- c(shls, floor(length(all_tlmks[[k]])/2+1)) } shls2 <- c() ## get vector of 1/2 no. of observations for (k in 1:length(all_tlmks)) { shls2 <- c(shls2, floor(length(all_tlmks[[k]])/2)) } ## get vectors of distances for (k in 1:length(all_tlmks)) { vdist <- c() for (j in 1: l_tlmks[k]) { if (j < shls2[k]) { assign(paste("all_svdist",k,sep="_"), vdist <- c(vdist, abs(all_tlmks[[k]][j]-all_tlmks[[k]][j+shls[k]]))) } } } rm(vdist) all_vdist <- sapply(mixedsort(ls(pat="all_svdist_")), get, simplify = FALSE) names(all_vdist) <- landmarks ## Get Shortest half md <- sapply(all_vdist, function (a) which.min(a)) lowerb <- as.vector(md) upperb <- lowerb + shls ## Gest Shortest half vector from raw data all_shvs <- c() for (k in 1:length(all_tlmks)) { all_shvs <- c(all_shvs, list(all_tlmks[[k]][lowerb[k]:upperb[k]])) } names(all_shvs) <- landmarks ## Round tlmks for mode calculations fun <- function(k) round(k, digits = 0) r_tlmks <- lapply(all_tlmks, fun) r_shvs <- lapply(all_shvs,fun) ## Get Summary Data sumdata <- round(rbind(sapply(all_tlmks, length), sapply(all_tlmks, min), sapply(all_tlmks, max), sapply(all_tlmks, function(a) ((max(a)-min(a))/2)+min(a)),sapply(all_tlmks, mean), sapply(all_tlmks, sd), sapply(all_tlmks, median), sapply(all_tlmks, mad, const=1), sapply(r_tlmks, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(all_tlmks, skewness), sapply(all_tlmks, kurtosis), shls, sapply(all_shvs, function (b) min(b)), sapply(all_shvs, function(c) max(c)), sapply(all_shvs, function(a) ((max(a)-min(a))/2)+min(a)),sapply(all_shvs, mean), sapply(all_shvs, sd), sapply(all_shvs, median), sapply(all_shvs, mad, const=1), sapply(r_shvs, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(all_shvs, skewness), sapply(all_shvs, kurtosis)), digits=1) rownames(sumdata) <- c("n", "min", "max", "half_range", "mean", "sd", "median","MAD", "mode", "skewness", "kurtosis", "SH_n", "SH_min", "SH_max", "SH_half_range", "Shorth (SH_mean)", "SH_sd", "SH_median","SH_MAD", "SH_mode", "SH_skewness", "SH_kurtosis") colnames(sumdata) <- landmarks ## Difference between mean and midrange of SH distance <- sumdata[5,]-sumdata[15,] ## Get 75 quantile for data between the SHORTH and MAX quant75 <- c() for (i in 1:length(all_tlmks)) { quant75 <- c(quant75, quantile(subset(all_tlmks[[i]], all_tlmks[[i]]>=sumdata[16,i]), 0.75)) } names(quant75) <- landmarks Shormax <-round(quant75, digits=1) sumdata_final<-rbind(sumdata,Shormax) colnames(sumdata_final)<-landmarks write.csv(sumdata_final, file = paste(directory,"/pooled_summaryStats.csv", sep="")) ## save out data to home directory @ one decimal place print("saved pooled summary statistics") ## Get Boxplot stats (wisker data etc.) for (i in 1:length(landmarks)) { assign(paste("bpstats",i,sep="_"), boxplot.stats(all_tlmks[[i]], coef=1.5)) } ## PDF of Final Histograms print("starting pooled combo histobox plot analysis...") dev.new() pdf(file="FSTD_histograms.pdf") par(mfcol=c(4,2)) for (i in 1:length(landmarks)) { xmax <- max(sort(td3[,i+start_1]))+2 average <- sumdata_final[5,i] shorth <- sumdata_final[16,i] shormax <- sumdata_final[23,i] shmin <- sumdata_final[13,i] shmax <- sumdata_final[14,i] boxplot(td3[[i+start_1]], horizontal=TRUE, axes=FALSE, ylim=c(0,xmax)) segments(shmin, 0.55, shmax, 0.55, lwd=1.8, col="gray30") ## SH height segments(shmin, 0.65, shmin, 0.55, lwd=1.8, col="gray30") ## SH bottom tickmark segments(shmax, 0.65, shmax, 0.55, lwd=1.8, col="gray30") ## SH top tickmark mtext(landmarks[i], side=3, line=-0.1) points(shorth,0.55, pch=19) ## Shorth points(shormax,0.55, pch=19, col="darkgrey") ## Shormax points(average,1,pch=4) ## mean hist(td3[[i+start_1]],col="darkgrey", main=NULL, xlab="mm", xlim=c(0,xmax)) } dev.off() graphics.off() ## JPEGs of Final Histograms for (i in 1:length(landmarks)) { dev.new() jpeg(filename = paste("Final_Histogram_",landmarks[i],".jpg", sep=""), width = 20, height = 15, units = "cm", res = 600) xmax <- max(sort(td3[,i+start_1]))+2 average <- sumdata_final[5,i] shorth <- sumdata_final[16,i] shormax <- sumdata_final[23,i] shmin <- sumdata_final[13,i] shmax <- sumdata_final[14,i] par(fig=c(0,0.8,0,0.8), new=TRUE) hist(td3[[i+start_1]],col="darkgrey", main=NULL, xlab="mm", xlim=c(0,xmax)) par(fig=c(0,0.8,0.51,0.9), new=TRUE) boxplot(td3[[i+start_1]], horizontal=TRUE, axes=FALSE, ylim=c(0,xmax)) segments(shmin, 0.55, shmax, 0.55, lwd=1.8, col="gray30") ## SH height segments(shmin, 0.65, shmin, 0.55, lwd=1.8, col="gray30") ## SH bottom tickmark segments(shmax, 0.65, shmax, 0.55, lwd=1.8, col="gray30") ## SH top tickmark mtext(landmarks[i], side=3, line=-0.1) points(shorth,0.55, pch=19) ## Shorth points(shormax,0.55, pch=19, col="darkgrey") ## Shormax points(average,1,pch=4) ## mean dev.off() graphics.off() } } print(paste("saved ",landmark_number," combo histobox plots of pooled data",sep="")) ## Move files to directories shell(cmd=paste("mkdir ",directory,"/FSTD_study_boxplots/",sep=""), translate = TRUE) shell(cmd=paste("mkdir ",directory,"/FSTD_violinplots/",sep=""), translate = TRUE) shell(cmd=paste("mkdir ",directory,"/FSTD_sex/",sep=""), translate = TRUE) shell(cmd=paste("mkdir ",directory,"/FSTD_age/",sep=""), translate = TRUE) shell(cmd=paste("mkdir ",directory,"/FSTD_histograms/",sep=""), translate = TRUE) shell(cmd=paste("move ",directory,"/Boxplot*.* ",directory,"/FSTD_study_boxplots/",sep=""), translate = TRUE) shell(cmd=paste("move ",directory,"/Vioplot*.* ",directory,"/FSTD_violinplots/",sep=""), translate = TRUE) shell(cmd=paste("move ",directory,"/Sex*.* ",directory,"/FSTD_sex/",sep=""), translate = TRUE) shell(cmd=paste("move ",directory,"/Age*.* ",directory,"/FSTD_age/",sep=""), translate = TRUE) shell(cmd=paste("move ",directory,"/Final_Histogram*.* ",directory,"/FSTD_histograms/",sep=""), translate = TRUE) rm(list = ls()) print("file clean-up and organization complete") ## Update dlg <- tktoplevel() tkwm.title(dlg,"TDStats") labelText <- tclVar("TDStats has generated a comprehensive overview of the data that you loaded. \n\n Your output files have been successfully saved to your chosen directory. \n\nEnjoy your results and keep an eye out for new updates to TDStats @ CRANIOFACIALidentification.com! \n") label1 <- tklabel(dlg,text=tclvalue(labelText)) tkconfigure(label1,textvariable=labelText) tkgrid(label1) Cont.but <- tkbutton(dlg,text="Close TDStats",command=function()tkdestroy(dlg)) tkgrid(Cont.but) tkfocus(dlg) tkbind(dlg, "", function() {tkgrab.release(dlg)}) tkwait.window(dlg) rm(list = ls()) print("TDStats analysis complete") ## End of TDStats