#############################################################
#                       TDStats v2014.1                     #
#                 Written by CN Stephan, 2012               #
#############################################################

## Instructions:
## Facial soft tissue depth (FSTD) file must be of the type ".cvs"
## The FSTD file must contain data of at least one study with n>10.
## FSTD file must be in format similar to C-Table:
## -Data should be organized by column under landmark headers.
## -The first landmark should populate column 11 of the table.
## -The first 10 columns should contain the following descriptive information, in sequence:
## Study, Year, Published, Study Code, Method, Sample, Population, Region, Age, Sex.
## For exemplar  formatting, see the C-Table @ CRANIOFACIALidentification.com

## Import libraries
required.packages <- c("tcltk", "gtools", "Hmisc", "PerformanceAnalytics", "sm", "vioplot", "xts", "zoo")
new.packages <- required.packages[!(required.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library(tcltk) ## for GUI
library(Hmisc) ## for BOXPLOTS
library(gtools) ## for additional BOXPLOT DATA / VIOLIN PLOT
library(PerformanceAnalytics) ## for additional BOXPLOT DATA
library(zoo) ## for PerformanceAnalytics
library(xts) ## for PerformanceAnalytics
library(vioplot) ## for VIOLIN PLOT
library(sm) ## for SEX, AGE and FINAL BOX plots

## Display Logo Window
R.base.dir <- system.file()
setwd(paste(R.base.dir,"/../../doc/html",sep=""))
image <- as.numeric(file.access(paste(R.base.dir,"/../../doc/html/TDStats_logo.gif",sep=""), mode=0))
if (image==-1) {
	tkmessageBox(title="TDStats", message="You are missing the TDStats logo. \n\nTo use TDStats you must save the TDStats logo to the following path: \n\nC:/Program Files/R/R-[version]/doc/html. \n\n See CRANIOFACIALidentification.com for more information.",icon="warning")
	q()
} 
if (image==0) {
	tt <- tktoplevel()
	tkwm.title(tt, "TDStats")
	tkwm.geometry(tt,"600x280+30+30")
	image1<-tclVar()
	tcl("image","create","photo",image1,file="TDStats_logo.gif")
	imgAsLabel<-tklabel(tt,image=image1,bg="white")
	tkpack(imgAsLabel)
	tkconfigure(tt,cursor="watch")
	Sys.sleep (10)
	tkconfigure(tt,cursor="arrow")
	tkdestroy(tt)

	## Terms of use
	tkmessageBox(title="TDStats", message="By using this program you agree to CRANIOFACIALidentification.com's 'Terms of Use'. \n\nNO WARRANTIES OR GUARANTEES OF ANY KIND are provided with TDStats. You use TDStats completely, and entirely, at your own risk. \n\nSee www.CRANIOFACIALidentification.com for further details. \n ------------------------------------------------------------------------\n\nInstructions: \n- The facial soft tissue depth (FSTD) file must be of the type '.cvs'.\n- The FSTD file must contain data from at least one study with N>10. \n- The FSTD file must be in format similar to C-Table:\na) Data should be organized by column under landmark headers.\nb) The first landmark should populate column 11 of the table.\nc) The first 10 columns should contain the following descriptive information, in sequence: Study, Year, Published, Study Code, Method, Sample, Population, Region, Age, Sex.\n\nTDStats can handle data for a single study or multiple studies. It will also analyze a few or many landmarks. For exemplar  formatting, see CRANIOFACIALidentification.com.\n\nNote:\n- TDStats automatically removes right side bilateral landmarks to report midline and left side landmarks only.\n- TDStats will round the data to the nearest whole value to calculate pooled modes.\n- TDStats uses raw values for all other calculations, and will report correlations to two decimal places, and all other statistics to one decimal place.\n",icon="info",type="ok")

	## Set working directory
	directory <- tclvalue(tkchooseDirectory(title="Select the WORKING DIRECTORY, i.e., where your FSTD .cvs file is located and where you want the new data files written to."))
	setwd(directory)

	## Load the raw FSTD data, pull out landmarks, and make numeric matrix
	tddirectory <- tclvalue(tkgetOpenFile(title="Open the .csv data file containing the facial soft tissue depths you wish to analyse"))
	td <- read.csv(tddirectory, header = TRUE, na.strings = "", skip = 2)
	
	## Set the number of LANDMARKS to be analyzed
	dlg<-tktoplevel()
	tkwm.title(dlg,"TDStats")
	tkwm.geometry(dlg,"+30+30")
	tkgrid(tklabel(dlg,text="Enter the number of landmarks contained in the dataset: \n"), columnspan=4)
	mlmks <- tclVar()
	blmks <- tclVar()
	tlmks <- tclVar()
	entry.mlmks <- tkentry(dlg,width="5",textvariable=mlmks)
	tkgrid(tklabel(dlg,text="Midline landmarks (a) = 16, for the default C-Table:"),entry.mlmks)
	entry.blmks <- tkentry(dlg,width="5",textvariable=blmks)
	tkgrid(tklabel(dlg,text="Bilateral landmarks (b) = 11, for the default C-Table:"),entry.blmks)
	entry.tlmks <- tkentry(dlg,width="5",textvariable=tlmks)
	tkgrid(tklabel(dlg,text="Total landmarks (a+[b*2]) = 38, for the default C-Table:"),entry.tlmks)
	OnOK <- function () {
		NameVal1 <- tclvalue(mlmks)
		NameVal2 <- tclvalue(blmks)
		tkdestroy(dlg)
	}
	OK.but<-tkbutton(dlg,text="OK",command=OnOK)
	tkgrid(OK.but, columnspan=4)
	tkfocus(dlg)
	tkbind(dlg, "<Destroy>", function() {tkgrab.release(dlg)})
	tkwait.window(dlg)
	mid_lmks <- as.numeric(tclvalue(mlmks))
	bi_lmks <- as.numeric(tclvalue(blmks))
	t_lmks <- as.numeric(tclvalue(tlmks))
	Almks <- mid_lmks+bi_lmks

	# Warning for landmark counts
	if (mid_lmks+(bi_lmks*2)!=t_lmks) {
		tkmessageBox(title="Error", message="Landmarks have been counted incorrectly. TDStats will now close. \n",icon="warning")
		q()
	}
	tkmessageBox(title="TDStats", message="Data successfully loaded! \n\n TDStats will now perform a number of functions (to include boxplots, scatter plots, histograms, density plots, means,  shorths, 75-shormaxes and more) in the style of: Stephan et al. (2013) Facial Soft Tissue Depth Statistics and Enhanced Point Estimators for Craniofacial Identification: The Debut of the Shorth and 75-Shormax, Journal of Forensic Sciences 58(6):1439-1457. \n\nThis will take a couple of minutes to complete. Sit back and relax.....do not panic if your R window temporarily disappears, freezes or states 'not responding'. TDStats will momentarily give your machine's memory and processor a workout. Flashing boxes and/or a busy cursor display are good signs. TDStats will let you know when data analysis is complete.\n\nOutput files will be placed into your nominated working directory. Graphs will be seperately outputted in raw formats to seperate directories and in a collated pdf format.\n\nPlease note that:\n1. Studies with a sample size N<10 will be automatically removed for study specific analyses, however, ALL entered studies and data will be used in the pooled analysis except where sample size for any landmark is <10 (these landmarks will be automatically dropped from the analysis). Details of the dropped studies can be found in the 'dropped_studies.txt' output. \n2. For sex comparisons at any landmark, too few sex specfic data will result in blank plots for that landmark.",icon="info")

	## Get landmark labels
	cnames_raw <- colnames(td)
	mlmk_s <- 10+1
	bilmk_s <- 10+mid_lmks+1

	## Number of studies & keep only those where n>=10
	study_factor <- factor(td[,1])
	study_n <- summary(study_factor)
	study_level <- levels(study_factor)

	Tstudy <- c()
	for (i in 1:length(study_n)) {
		if (study_n[[i]]>=10) {
			Tstudy <- c(Tstudy,study_level[i])
		}
	}
	Rstudy <- c()
	for (i in 1:length(study_n)) {
		if (study_n[[i]]<=10) {
			Rstudy <- c(Rstudy,study_level[i])
		}
	}
	l_Rstudy <- length(Rstudy)
	l_Tstudy <- length(Tstudy)
	df_Rstudy <- as.data.frame(Rstudy)
	if (l_Rstudy>0) {colnames(df_Rstudy) <- "Studies Removed from Boxplots"}
	wait<-tkmessageBox(title="TDStats", message=paste(l_Rstudy," studies were removed because they have a sample size <10.\n \n See 'dropped_studies.txt' for details."), icon="info",type="ok")

	# study code labels
	code <- c()
	for (i in 1:length(Tstudy)) {
		X <- paste("[",i,"]",sep="")
		code <- append(code,X)
	}
	study_code <- cbind("Studies for Boxplots"=Tstudy,Code=code)
	
	## Break out studies & then combine
	td_trim <- c()
	for (i in 1:length(Tstudy)) {
		assign(paste("Data_",i,sep=""), subset(td, Study == Tstudy[i]))
		td_trim <- rbind(td_trim,get(paste("Data_",i,sep="")))
	}
	td3 <- td_trim[,-which(apply(td_trim,2, function (x) all(is.na(x))))]
	temp <- apply(td3,2, function (x) length(which(!is.na(x)))<10)
	temp1 <- length(which(temp==TRUE))
	if (temp1>0) {
		td3 <- td_trim[,-which(apply(td_trim,2, function (x) length(which(!is.na(x)))<10))]
	} else {td3 <- td3}
	tnames <- colnames(td3)
	l_td <- length(tnames)
	start_pos <- 11
	start_1 <- 10
	tbi_lmks <- length(grep("R_",tnames))
	landmarks <- tnames[start_pos:(l_td-tbi_lmks)]	
	
	## For each data table collect required summary data
	for (i in 1:length(Tstudy)){
		tdA <- get(paste("Data_",i,sep=""))
		tdA <- tdA[,11:(10+Almks)]
		tdB <- tdA[,-which(apply(tdA,2, function (x) all(is.na(x))))]
		temp <- apply(tdB,2, function (x) length(which(!is.na(x)))<10)
		temp1 <- length(which(temp==TRUE))
		if (temp1>0) {
			tdC <- tdB[,-which(apply(tdB,2, function (x) length(which(!is.na(x)))<10))]
		} else {tdC <- tdB}
		l_td <- length(tdC)
		lmks <- as.list(tdC)
		col_ns <- names(lmks)

		tlmks <- c() ## list of data with NAs removed
		for (k in 1:l_td) {
			tlmks <- c(tlmks, list(sort(lmks[[k]])))
		}
		names(tlmks) <- col_ns

		l_tlmks <- c() ## vector of lengths of tlmks vectors 
		for (k in 1:length(tlmks)) {
			l_tlmks <- c(l_tlmks, length(tlmks[[k]]))
		}

		shls <- c() ## get vector of 1/2 no. of observations +1
		for (k in 1:length(tlmks)) {
			shls <- c(shls, floor(length(tlmks[[k]])/2+1))
		}

		shls2 <- c() ## get vector of 1/2 no. of observations
		for (k in 1:length(tlmks)) {
			shls2 <- c(shls2, floor(length(tlmks[[k]])/2))
		}

		## get vectors of distances
		for (k in 1:length(tlmks)) {
			vdist <- c()
			for (j in 1: l_tlmks[k]) {
				if (j < shls2[k]) {
					assign(paste("svdist",k,sep="_"), vdist <- c(vdist, abs(tlmks[[k]][j]-tlmks[[k]][j+shls[k]])))
				}
			}
		}
		rm(vdist)
		vdist <- sapply(mixedsort(ls(pat="svdist_")), get, simplify = FALSE)
		names(vdist) <- col_ns

		## Get Shortest half
		md <- sapply(vdist, function (a) which.min(a))
		lowerb <- as.vector(md)
		upperb <- lowerb + shls

		## Gest Shortest half vector from raw data
		shvs <- c()
		for (k in 1:length(tlmks)) {
			shvs <- c(shvs, list(tlmks[[k]][lowerb[k]:upperb[k]]))
		}
		names(shvs) <- col_ns
		
		## Get Summary Data
		sumdata <- round(rbind(sapply(tlmks, length), sapply(tlmks, min), sapply(tlmks, max), sapply(tlmks, function(a) ((max(a)-min(a))/2)+min(a)),sapply(tlmks, mean), sapply(tlmks, sd), sapply(tlmks, median), sapply(tlmks, mad, const=1), sapply(tlmks, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(tlmks, skewness), sapply(tlmks, kurtosis), shls, sapply(shvs, function (b) min(b)), sapply(shvs, function(c) max(c)), sapply(shvs, function(a) ((max(a)-min(a))/2)+min(a)),sapply(shvs, mean), sapply(shvs, sd), sapply(shvs, median), sapply(shvs, mad, const=1), sapply(shvs, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(shvs, skewness), sapply(shvs, kurtosis)), digits=1)
		rownames(sumdata) <- c("n", "min", "max", "half_range", "mean", "sd", "median","MAD", "mode", "skewness", "kurtosis", "SH_n", "SH_min", "SH_max", "SH_half_range", "SH_mean (Shorth)", "SH_sd", "SH_median","SH_MAD", "SH_mode", "SH_skewness", "SH_kurtosis")
				
		## Get 75 quantile for data between the SHORTH and MAX
		quant75 <- c()
		for (m in 1:length(tlmks)) {
			quant75 <- c(quant75, quantile(subset(tlmks[[m]], tlmks[[m]]>=sumdata[16,m]), 0.75))
		}
		names(quant75) <- col_ns
		Shormax <-round(quant75, digits=1)
		sumdata_final<-rbind(sumdata,Shormax)
		assign(paste("SumData",i,sep=""),sumdata_final)
		write.csv(sumdata_final, file = paste(directory,"/BoxPlotSummary_Study",i,".csv", sep="")) ## save out data to home directory @ one decimal place
	}
	sink("study_codes.txt"); print(study_code); sink ()
	print("saved study_codes.txt")
	sink("dropped_studies.txt"); print(df_Rstudy); sink ()
	print("saved dropped_studies.txt")
	if (l_Rstudy==0) {
		td4 <- vector("list", 1)
		td4[[1]]<-SumData1
	} else {td4<-sapply(mixedsort(ls(pat="SumData")), get, simplify = FALSE)}
	names(td4) <- Tstudy
	sink("study_summaryData.txt"); print(td4); sink()
	print("saved study_summaryData.txt")

	## Generate Boxplots: 
	# Create NA filled matrix for boxplot extras and add data
	print("starting study boxplot analysis...")
	td5 <- vector("list", length(Tstudy))
	temp_m <- matrix(NA,23,length(landmarks))
	colnames(temp_m) <- landmarks
	rownames(temp_m) <- c("n", "min", "max", "half_range", "mean", "sd", "median","MAD", "mode", "skewness", "kurtosis", "SH_n", "SH_min", "SH_max", "SH_half_range", "SH_mean (Shorth)", "SH_sd", "SH_median","SH_MAD", "SH_mode", "SH_skewness", "SH_kurtosis", "Shormax")
	for (i in 1:length(Tstudy)) td5[[i]]<-temp_m
	names(td5) <- Tstudy
	for (i in 1: length(Tstudy)) {
		x<-colnames(td4[[i]])
		for (k in 1:length(x)) {
			td5[[Tstudy[i]]][,x[k]]<- td4[[Tstudy[i]]][,x[k]]
		}
	}
	
	## Generate JPEG Boxplots: 
	for (i in 1:length(landmarks)) {
		dev.new()
		jpeg(filename = paste("BoxPlot_byStudy_",landmarks[i],".jpg", sep=""), width = 23, height = 15, units = "cm", res = 600) 
		par(mar = c(3, 3, 3, 2) + 0.1, lwd = "1.8", font.lab = "2")
		ymax <- max(sort(td3[,i+start_1]))+2
		boxplot(td3[[landmarks[i]]]~factor(td3[[1]]), data = td3, names = FALSE, varwidth = FALSE, boxwex = 0.5, main = landmarks[i], ylim = c(0,ymax), yaxt = "n", cex.main = 2.5)
		axis(side = 2, las = 2, at = NULL, cex.axis = 1.5, lwd = 1.8)
		axis(side = 1, labels = code, at = 1:length(code), cex.axis = 1.5, las = 1, lwd = 1.8)
	
		for (j in 1:length(Tstudy)) {
			v_mean <- td5[[j]][,landmarks[i]][5]
			shmin <- td5[[j]][,landmarks[i]][13]
			shmax <- td5[[j]][,landmarks[i]][14]
			points(j,v_mean, pch=19) ## Mean
			segments(j+0.45, shmin, j+0.45, shmax, lwd=1.8, col="gray30") ## SH height
			segments(j+0.35, shmin, j+0.45, shmin, lwd=1.8, col="gray30") ## SH bottom tickmark
			segments(j+0.35, shmax, j+0.45, shmax, lwd=1.8, col="gray30") ## SH top tickmark
		}
		dev.off()
		graphics.off()
	}
	
	## PDF of study boxplots
	dev.new()
	pdf(file="FSTD_Study_boxplots.pdf") 
	par(mfrow=c(1,1))
	for (i in 1:length(landmarks)) {
		ymax <- max(sort(td3[,i+start_1]))+2
		boxplot(td3[[landmarks[i]]]~factor(td3[[1]]), data = td3, names = FALSE, varwidth = FALSE, boxwex = 0.5, main = landmarks[i], ylim = c(0,ymax), yaxt = "n", cex.main = 2.5)
		axis(side = 2, las = 2, at = NULL, cex.axis = 1.5, lwd = 1.8)
		axis(side = 1, labels = code, at = 1:length(code), cex.axis = 1.5, las = 1, lwd = 1.8)
		for (j in 1:length(Tstudy)) {
			v_mean <- td5[[j]][,landmarks[i]][5]
			shmin <- td5[[j]][,landmarks[i]][13]
			shmax <- td5[[j]][,landmarks[i]][14]
			points(j,v_mean, pch=19) ## Mean
			segments(j+0.45, shmin, j+0.45, shmax, lwd=1.8, col="gray30") ## SH height
			segments(j+0.35, shmin, j+0.45, shmin, lwd=1.8, col="gray30") ## SH bottom tickmark
			segments(j+0.35, shmax, j+0.45, shmax, lwd=1.8, col="gray30") ## SH top tickmark
		}
		dev.off()
		graphics.off()
	}
	
	if (l_Tstudy==1) {print(paste("saved summary boxplots for ",l_Tstudy," study",sep=""))
	} else {print(paste("saved summary boxplots for ",l_Tstudy," studies",sep=""))}

	## Violing plots
	print("starting pooled analysis for violin plots...")
	landmarks_all <- colnames(td3)
	l_td <- length(landmarks_all)
	vp_bi_lmks <- length(grep("R_",tnames))
	vp_end <- (l_td-vp_bi_lmks)
	landmarks <- landmarks_all[start_pos:vp_end]	
	landmark_number <- length(landmarks)

	## Split out landmarks, and combine into list
	for (i in start_pos:vp_end) {
		assign(paste("lmk_vp",i, sep="_"), td3[,i])
	}
	vp_lmks <- sapply(mixedsort(ls(pat="lmk_vp")), get, simplify = FALSE)
	for (i in 1:length(start_pos:vp_end)) {
		if (length(which(is.na(vp_lmks[[i]])))==0) {
			assign(paste("tlmk_vp",i,sep="_"), vp_lmks[[i]])
			} else {
		assign(paste("tlmk_vp",i, sep="_"), vp_lmks[[i]][-which(is.na(vp_lmks[[i]]))])
		}
	}
	vp_tlmks <- sapply(mixedsort(ls(pat="tlmk_vp")), get, simplify = FALSE)
	names(vp_tlmks) <- landmarks

	## PDF of violin plots
	dev.new()
	pdf(file="FSTD_Violin_plots.pdf") 
	par(mfrow=c(2,6))
	for (i in 1:landmark_number) {
		maxi <- max(vp_tlmks[[i]])+2
		vioplot(vp_tlmks[[i]], drawRect = FALSE, names = FALSE, col="black", ylim = c(0,maxi))
		axis(side = 1, labels = landmarks[i], at = 1, cex.axis=2)
		axis(side = 2, ylim = c(0,maxi))
	}
	dev.off()
	graphics.off()

	## JPEGs of Violin plots
	for (i in 1:landmark_number) {
		dev.new()
		par(mar=c(6,6,1,1)+0.1)
		jpeg(filename = paste("Vioplot_",landmarks[i],".jpg",sep=""), width = 8, height = 25, units = "cm", res = 300) 
		maxi <- max(vp_tlmks[[i]])+2
		vioplot(vp_tlmks[[i]], drawRect = FALSE, names = FALSE, col="black", ylim = c(0,maxi))
		axis(side = 1, labels = landmarks[i], at = 1, cex.axis=2)
		axis(side = 2, ylim = c(0,maxi))
		dev.off()
		graphics.off()
	}
	print(paste("saved violin plots for ",landmark_number," landmarks",sep=""))
	
	## Correlation Matrix
	corrmat <- round(cor(td3[,start_pos:(l_td-tbi_lmks)],td3[,start_pos:(l_td-tbi_lmks)],use="pairwise.complete.obs",method="pearson"),digits=2)
	write.csv(corrmat, file = "correlation_matrix.csv")
	print(paste("saved correlation matrix for ",landmark_number," landmarks",sep=""))

	## JPEGs of Density Distributions by Sex
	print("starting density plot analysis by sex...")
	for (i in 1:landmark_number) {
		assign(paste("sex_lmk",i,sep=""), na.omit(td3[,c(1,start_1+i)]))
		dev.new()
		jpeg(filename = paste("Sex_", landmarks[i],".jpg", sep=""), width = 10, height = 15, units = "cm", res = 300)
		X<-na.omit(td3[,c(10,start_1+i)])
		Xlevels<-summary(factor(X[,1]))
		sm.density.compare(X[,2],X[,1], xlab="mm", col=c("black", "darkgray"), lty=c(1,1))
		legend("topright", inset=.05, title="Sex",c("Females","Males"),cex=0.5, fill=c("black", "darkgray"))
		title(main=landmarks[i])
		dev.off()
		graphics.off()
	}

	## PDF of Density Distributions by Sex
	dev.new()
	pdf(file="FSTD_Sex_densities.pdf") 
	par(mfrow=c(2,3))
	for (i in 1:landmark_number) {
		x<-td3[,c(10,start_1+i)]
		X<-na.omit(x)
		Xlevels<-summary(factor(X[,1]))
		sm.density.compare(X[,2],X[,1], xlab="mm", col=c("black", "darkgray"), lty=c(1,1))
		legend("topright", inset=.05, title="Sex",c("Females","Males"),cex=0.5, fill=c("black", "darkgray"))
		title(main=landmarks[i])
	}
	dev.off()
	graphics.off()
	print(paste("saved ",landmark_number," sex density plots",sep=""))

	## PDF of Scatterplots by Age
	print("starting scatterplot analysis by age...")
	corage1 <- td3[,c(9,start_pos:vp_end)]
	row.names(corage1) <- NULL
	corage2 <- data.frame(data.matrix(subset(corage1, !is.na(Age))))
	corage3 <- round(cor(corage2[,1],corage2[,2:length(corage2)],use="pairwise.complete.obs",method="pearson"),digits=2)
	age_lmks <- names(corage2[2:length(corage2)])
	dev.new()
	pdf(file="FSTD_Age_scatter.pdf") 
	par(mfrow=c(2,3))
	for (i in 2:length(age_lmks)) {
		X<-corage2[,c(1,1+i)]
		ymax <- max(X[,2]*1.4, na.rm=TRUE)
		reg1 <- lm(X[,2]~X[,1])
		plot(X[,1], X[,2], xlab="Age (years)", xlim=c(0,100), ylab="mm", ylim=c(0,ymax))
		title(main=age_lmks[i])
		mtext(paste("r=",corage3[i],sep=""),adj=1,cex=0.7)
		abline(reg1, col="red")
	}
	dev.off()
	graphics.off()

	## JPEGs of Scatterplots by Age
	for (i in 2:length(age_lmks)) {
		dev.new()
		jpeg(filename = paste("Age_", age_lmks[i],".jpg", sep=""), width = 10, height = 15, units = "cm", res = 300)
		X<-corage2[,c(1,1+i)]
		reg1 <- lm(X[,2]~X[,1])
		plot(X[,1], X[,2], xlab="Age (years)", xlim=c(0,100), ylab="mm", ylim=c(0,max(X[,2]*1.4, na.rm=TRUE)))
		title(main=age_lmks[i])
		mtext(paste("r=",corage3[i],sep=""),adj=1)
		abline(reg1, col="red")
		dev.off()
		graphics.off()
	}
	print(paste("saved ",landmark_number," age scatterplots",sep=""))

	#Histograms with adjoining boxplot, shortest half, shorth and 75-shormax
	## Split out landmarks, and combine into list
	tnames <- colnames(td3)
	l_td <- length(tnames)
	start_pos <- 11
	start_1 <- 10
	tbi_lmks <- length(grep("R_",tnames))
	landmarks <- tnames[start_pos:(l_td-tbi_lmks)]	
	td5 <- data.frame(data.matrix(td3[,1:(l_td-tbi_lmks)]))
	row.names(td5) <- NULL
	temp <- apply(td5,2, function (x) length(which(!is.na(x)))<10)
	temp1 <- length(which(temp==TRUE))
		if (temp1>0) {
			td6 <- td5[,-which(apply(td5,2, function (x) length(which(!is.na(x)))<10))]
		} else {td6 <- td5}
	for (i in start_pos:(l_td-tbi_lmks)) {
		assign(paste("all_lmk",i, sep="_"), td6[,i])
	}
	all_lmks <- sapply(mixedsort(ls(pat="all_lmk_")), get, simplify = FALSE)
	for (i in 1:length(start_pos:(l_td-tbi_lmks))) {
		if (length(which(is.na(all_lmks[[i]])))==0) {
		assign(paste("all_tlmk",i, sep="_"), sort(all_lmks[[i]])) 
		} else {
		assign(paste("all_tlmk",i, sep="_"), sort(all_lmks[[i]][-which(is.na(all_lmks[[i]]))]))
		}
	}
	all_tlmks <- sapply(mixedsort(ls(pat="all_tlmk_")), get, simplify = FALSE)

	l_tlmks <- c() ## vector of lengths of tlmks vectors 
	for (k in 1:length(all_tlmks)) {
		l_tlmks <- c(l_tlmks, length(all_tlmks[[k]]))
	}

	shls <- c() ## get vector of 1/2 no. of observations +1
	for (k in 1:length(all_tlmks)) {
		shls <- c(shls, floor(length(all_tlmks[[k]])/2+1))
	}

	shls2 <- c() ## get vector of 1/2 no. of observations
	for (k in 1:length(all_tlmks)) {
		shls2 <- c(shls2, floor(length(all_tlmks[[k]])/2))
	}

	## get vectors of distances
	for (k in 1:length(all_tlmks)) {
		vdist <- c()
		for (j in 1: l_tlmks[k]) {
			if (j < shls2[k]) {
				assign(paste("all_svdist",k,sep="_"), vdist <- c(vdist, abs(all_tlmks[[k]][j]-all_tlmks[[k]][j+shls[k]])))
			}
		}
	}
	rm(vdist)
	all_vdist <- sapply(mixedsort(ls(pat="all_svdist_")), get, simplify = FALSE)
	names(all_vdist) <- landmarks

	## Get Shortest half
	md <- sapply(all_vdist, function (a) which.min(a))
	lowerb <- as.vector(md)
	upperb <- lowerb + shls
	
	## Gest Shortest half vector from raw data
	all_shvs <- c()
	for (k in 1:length(all_tlmks)) {
		all_shvs <- c(all_shvs, list(all_tlmks[[k]][lowerb[k]:upperb[k]]))
	}
	names(all_shvs) <- landmarks
	
	## Round tlmks for mode calculations
	fun <- function(k) round(k, digits = 0)
	r_tlmks <- lapply(all_tlmks, fun)
	r_shvs <- lapply(all_shvs,fun)
	
	## Get Summary Data
	sumdata <- round(rbind(sapply(all_tlmks, length), sapply(all_tlmks, min), sapply(all_tlmks, max), sapply(all_tlmks, function(a) ((max(a)-min(a))/2)+min(a)),sapply(all_tlmks, mean), sapply(all_tlmks, sd), sapply(all_tlmks, median), sapply(all_tlmks, mad, const=1), sapply(r_tlmks, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(all_tlmks, skewness), sapply(all_tlmks, kurtosis), shls, sapply(all_shvs, function (b) min(b)), sapply(all_shvs, function(c) max(c)), sapply(all_shvs, function(a) ((max(a)-min(a))/2)+min(a)),sapply(all_shvs, mean), sapply(all_shvs, sd), sapply(all_shvs, median), sapply(all_shvs, mad, const=1), sapply(r_shvs, function (d) as.numeric(names(sort(-table(d)))[1])), sapply(all_shvs, skewness), sapply(all_shvs, kurtosis)), digits=1)
	rownames(sumdata) <- c("n", "min", "max", "half_range", "mean", "sd", "median","MAD", "mode", "skewness", "kurtosis", "SH_n", "SH_min", "SH_max", "SH_half_range", "Shorth (SH_mean)", "SH_sd", "SH_median","SH_MAD", "SH_mode", "SH_skewness", "SH_kurtosis")
	colnames(sumdata) <- landmarks

	## Difference between mean and midrange of SH
	distance <- sumdata[5,]-sumdata[15,]

	## Get 75 quantile for data between the SHORTH and MAX
	quant75 <- c()
	for (i in 1:length(all_tlmks)) {
		quant75 <- c(quant75, quantile(subset(all_tlmks[[i]], all_tlmks[[i]]>=sumdata[16,i]), 0.75))
	}
	names(quant75) <- landmarks
	Shormax <-round(quant75, digits=1)
	sumdata_final<-rbind(sumdata,Shormax)
	colnames(sumdata_final)<-landmarks 
	write.csv(sumdata_final, file = paste(directory,"/pooled_summaryStats.csv", sep="")) ## save out data to home directory @ one decimal place
	print("saved pooled summary statistics")

	## Get Boxplot stats (wisker data etc.)
	for (i in 1:length(landmarks)) {
		assign(paste("bpstats",i,sep="_"), boxplot.stats(all_tlmks[[i]], coef=1.5))
	}

	## PDF of Final Histograms
	print("starting pooled combo histobox plot analysis...")
	dev.new()
	pdf(file="FSTD_histograms.pdf") 
	par(mfcol=c(4,2))
	for (i in 1:length(landmarks)) {
		xmax <- max(sort(td3[,i+start_1]))+2
		average <- sumdata_final[5,i]
		shorth <- sumdata_final[16,i]
		shormax <- sumdata_final[23,i]
		shmin <- sumdata_final[13,i]
		shmax <- sumdata_final[14,i]
		boxplot(td3[[i+start_1]], horizontal=TRUE, axes=FALSE, ylim=c(0,xmax))
		segments(shmin, 0.55, shmax, 0.55, lwd=1.8, col="gray30") ## SH height
		segments(shmin, 0.65, shmin, 0.55, lwd=1.8, col="gray30") ## SH bottom tickmark
		segments(shmax, 0.65, shmax, 0.55, lwd=1.8, col="gray30") ## SH top tickmark
		mtext(landmarks[i], side=3, line=-0.1) 
		points(shorth,0.55, pch=19) ## Shorth
		points(shormax,0.55, pch=19, col="darkgrey") ## Shormax
		points(average,1,pch=4) ## mean
		hist(td3[[i+start_1]],col="darkgrey", main=NULL, xlab="mm", xlim=c(0,xmax))
	}
	dev.off()
	graphics.off()

	## JPEGs of Final Histograms
	for (i in 1:length(landmarks)) {
		dev.new()
		jpeg(filename = paste("Final_Histogram_",landmarks[i],".jpg", sep=""), width = 20, height = 15, units = "cm", res = 600) 
		xmax <- max(sort(td3[,i+start_1]))+2
		average <- sumdata_final[5,i]
		shorth <- sumdata_final[16,i]
		shormax <- sumdata_final[23,i]
		shmin <- sumdata_final[13,i]
		shmax <- sumdata_final[14,i]
		par(fig=c(0,0.8,0,0.8), new=TRUE)
		hist(td3[[i+start_1]],col="darkgrey", main=NULL, xlab="mm", xlim=c(0,xmax))
		par(fig=c(0,0.8,0.51,0.9), new=TRUE)
		boxplot(td3[[i+start_1]], horizontal=TRUE, axes=FALSE, ylim=c(0,xmax))
		segments(shmin, 0.55, shmax, 0.55, lwd=1.8, col="gray30") ## SH height
		segments(shmin, 0.65, shmin, 0.55, lwd=1.8, col="gray30") ## SH bottom tickmark
		segments(shmax, 0.65, shmax, 0.55, lwd=1.8, col="gray30") ## SH top tickmark
		mtext(landmarks[i], side=3, line=-0.1) 
		points(shorth,0.55, pch=19) ## Shorth
		points(shormax,0.55, pch=19, col="darkgrey") ## Shormax
		points(average,1,pch=4) ## mean
		dev.off()
		graphics.off()
	}
}

print(paste("saved ",landmark_number," combo histobox plots of pooled data",sep=""))

## Move files to directories
shell(cmd=paste("mkdir ",directory,"/FSTD_study_boxplots/",sep=""), translate = TRUE)
shell(cmd=paste("mkdir ",directory,"/FSTD_violinplots/",sep=""), translate = TRUE)
shell(cmd=paste("mkdir ",directory,"/FSTD_sex/",sep=""), translate = TRUE)
shell(cmd=paste("mkdir ",directory,"/FSTD_age/",sep=""), translate = TRUE)
shell(cmd=paste("mkdir ",directory,"/FSTD_histograms/",sep=""), translate = TRUE)
shell(cmd=paste("move ",directory,"/Boxplot*.* ",directory,"/FSTD_study_boxplots/",sep=""), translate = TRUE)
shell(cmd=paste("move ",directory,"/Vioplot*.* ",directory,"/FSTD_violinplots/",sep=""), translate = TRUE)
shell(cmd=paste("move ",directory,"/Sex*.* ",directory,"/FSTD_sex/",sep=""), translate = TRUE)
shell(cmd=paste("move ",directory,"/Age*.* ",directory,"/FSTD_age/",sep=""), translate = TRUE)
shell(cmd=paste("move ",directory,"/Final_Histogram*.* ",directory,"/FSTD_histograms/",sep=""), translate = TRUE)
rm(list = ls())
print("file clean-up and organization complete")
 
## Update
dlg <- tktoplevel()
tkwm.title(dlg,"TDStats")
labelText <- tclVar("TDStats has generated a comprehensive overview of the data that you loaded. \n\n Your output files have been successfully saved to your chosen directory. \n\nEnjoy your results and keep an eye out for new updates to TDStats @ CRANIOFACIALidentification.com! \n")
label1 <- tklabel(dlg,text=tclvalue(labelText))
tkconfigure(label1,textvariable=labelText)
tkgrid(label1)
Cont.but <- tkbutton(dlg,text="Close TDStats",command=function()tkdestroy(dlg))
tkgrid(Cont.but)
tkfocus(dlg)
tkbind(dlg, "<Destroy>", function() {tkgrab.release(dlg)})
tkwait.window(dlg)
rm(list = ls())
print("TDStats analysis complete")
## End of TDStats