#!/bin/bash

#Basic overview of script is that it takes a load of input files (each .txt in current folder,
#format explained in Gen_Multiwfn_Input_Comms.py), runs each input file on each .fchk in the current folder
#and creates a load of input files for the Gen_Gelius_Spectrum.m matlab script. Spits out a list of pathnames
#to all input files created in a matlab friendly way. Also creates a folder structure with fragments for each
#fchk in its own folder(ESSENTIAL for the matlab script, which assumes all ML_input files in 1 folder belong to the 
#same system).
#NOTE:LINKED FRAGMENTS MUST HAVE THE SAME ORBITAL ENERGIES(i.e be from the same log file)
#BUG:DONT NAME FRAGMENTS as something thats in your .fchk filename (could mess up linked_file feature,which greps for a a fragment name in all other fragmetns)

#IMPORTANT GLOBAL CONSTANTS HERE
#None of these variables are modified in the script run
multiwfn_folder='C:/Users/Richard/Documents/Multiwfn_Installed'
curr_folder=$(pwd)
com_fext='.test'	#extension that signifies the input file(.txt is fine,as long as theres no extra .txt in curr folder)
all_orbs='cheese'	#set to "ALLORBS" if you want occupied/unoccupied orbital data in 1 file (rather than 2), used by Format_Multiwfn_Output.py
error='cheese'		#set to "NOERROR" if you want to disable the error checking in Format_Multiwfn_Output.py (NOT RECOMMENDED)
linked_files=1		#set to 1 if you use the link file feature in input files. set to anything else otherwiese (NOTE:=1 SLOWS DOWN THE CODE).STILL,probably safest to ALWAYS set to 1
cygwin=1		#set to 1 if working with cygwin (the pathnames need soem extra formatting to get into matlab format)
#echo $curr_folder


#First part of script gets Multiwfn in the correct folder and turns on silent mode
#(allows it to run without producing any pop-up windows. ESSENTIAL)
#POSSIBLE ERROR:make sure multiwfn is not in your systems path,otherwise script might use the wrong one
cp $multiwfn_folder/Multiwfn.exe $curr_folder
cp $multiwfn_folder/disdll_d.dll $curr_folder
cp $multiwfn_folder/libiomp5md.dll $curr_folder
cp $multiwfn_folder/settings.ini $curr_folder
lnumber=$(grep -n 'isilent' settings.ini | cut -c 1-2)
sed -i ""$lnumber"s/.*/  isilent= 1/" settings.ini


#echo ${inp_files[@]}
#echo ${fchk_files[@]}

#Works out how many files are linked,to initialise linkage array.REDUNDANT. but i kinda like the printout.
#numb_linked=0
#for file in $inp_files
#do linked=$(grep -i 'Link' $file | wc -l )
#   numb_linked=$((numb_linked+linked))
#done
#echo $numb_linked

#checks if a string is in an array,echos "y" if it is
function contains() {
    local n=$#
    local value=${!n}
    for ((i=1;i < $#;i++)) {
        if [ "${!i}" == "${value}" ]; then
            echo "y"
            return 0
        fi
    }
    echo "n"
    return 1
}






#FINDING ALL RELEVANT FILES IN FOLDER
fchk_files=$(ls | grep '.fchk')  #list of input files in current folder
inp_files=$(ls | grep "$com_fext") #list of output files in current folder
file_list=($inp_files)	#array of file names WITHOUT the extension
temp=$(echo ${file_list[@]/"$com_fext"})
file_list=($temp)
size_filelist=${#file_list[@]}
echo size_filelist equals $size_filelist






#Creates the file-linking array. file_list is just a list of the input files,1 per element.
#for file_list[0] children_list[0] contains either '' (no children files) or a list of children files.
#e.g if file 3(F3) and F4 have F1 as a parent then children_list[0]="F3 F4".
#In this example the outputs for F1/F3/F4 will be merged into a framgent with the name F1.
#NOTE: children_list is a string ARRAY. NOT an array of lists (which arent supported by bash)
#NEED TO MAKE SURE THIS IS DOABLE
if [ $linked_files -eq 1 ]
then	
for ((i=0;i<size_filelist;i++))
	do 
		tstring=""
		for ((j=0;j<size_filelist;j++))
		do
		if [ $(grep -i "${file_list[i]}" ${file_list[j]}"$com_fext" | wc -l) -eq 1 ]
			then
				tstring=$(echo ${tstring} ${file_list[j]})
		fi
	done	
	children_list[i]=$tstring
done

for ((i=0;i<size_filelist;i++))
do
	echo row $i is equal to ${children_list[i]}
done
fi

#Creating an array  of all parent files (all_parents), so i know which files to mv to
#output directories later
if [ $linked_files -eq 1 ]
then
	tstring=""
	for file in ${file_list[@]}
	do
		echo tval equals $tval
		if [ $(contains ${children_list[@]} $file)  == "n" ]	# making sure file is not in children list
		then
			tstring=$(echo $tstring $file)
		fi
	done
	all_parents=($tstring)
else
	all_parents=(${file_list[@]})
fi

#echo the value of all_parents[0] is ${all_parents[0]}
#echo the value of all_parents[1] is ${all_parents[1]}
#echo the value of all_parents[2] is ${all_parents[2]}
#echo the value of all_parents[3] is ${all_parents[3]}
#read -p "Press [Enter] key to continue"




#THIS IS WHERE THE BIG FOR LOOP STARTS
pname="{"	#will hold the entire path list of all new matalb input files (for Gen_Gelius_Spectrum.m

#loop replaces the fchk file at start of each input file. $fchk_f contains the fchk file we're currently working on
for fchk_f in $fchk_files		#BIG LOOP over all fchk files
do

#Replace top line of each input file with correct fchk file
for input in $inp_files
do sed -i "1s/.*/"$fchk_f"/" $input
done






#Now we generate the multiwfn commands for each file(_MCRF.txt extension) and run multiwfn on them
#NOTE:seperate loop for the format_multiwfn output because multiwfn prints so much crap to screen 
#and i want to see any error messeges from the format_output script
for file in $inp_files
do
	if [ "$com_fext" != ".txt" ]    #.txt required by Gen_Multiwfn_Input_Comms (and maybe others)
	then
		cp $file "${file/"$com_fext"/.txt}"
		file="${file/"$com_fext"/.txt}"
	fi
	Gen_Multiwfn_Input_Comms.py $file
	rfile="${file/.txt/_MCRF.txt}"
	./Multiwfn<$rfile 
	mv DOS_line.txt "${file/.txt/_DOS_line.txt}"			#renaming all the output files
	mv DOSfrag.txt "${file/.txt/_DOSfrag.txt}"
	mv DOS_curve.txt "${file/.txt/_DOS_curve.txt}"
done



echo THE LIST OF inp_files is now $inp_files


#This loop formats the multiwfn output to generate an output for matlab
for file in $inp_files
do
	if [ "$com_fext" != ".txt" ]    #.txt required by Gen_Multiwfn_Input_Comms (and maybe others)
	then
		file="${file/"$com_fext"/.txt}"
	fi	
	Format_Multiwfn_Output.py "${file/.txt/_DOS_line.txt}" $file $all_orbs $error
	mv "${file/.txt/_DOS_line_MLinpt.txt}" "${file/.txt/_MLinpt.txt}"



	#cleaning up any extra .txt created by last loop
#	if [ "$com_fext" != ".txt" ]
#	then
#		rm $file
#	fi
done



#Merging relevant fragments together (Needed all MLinpt generated before i could run this, hence the 3rd loop)
#Deleting the relevant fragments (the children fragments)
if [ $linked_files -eq 1 ]
then
for ((i=0;i<size_filelist;i++))
do
	file=$(echo ${file_list[i]}_MLinpt.txt)
	if [ "${children_list[i]}" != "" ]
		then
		merged_file=$file	#Name of the output fragment file after children files merged w/parents
		temp_list=$(echo "${children_list[i]}")
		for linked in $temp_list
		do
			Merge_Fragment_Files.py $merged_file "$linked"_MLinpt.txt
			echo WARNING Files have been merged, make sure HOMO is still in merged output files
			rm "$linked"_MLinpt.txt
			rm "$linked"_DOS_curve.txt
			rm "$linked"_DOS_line.txt
			rm "$linked"_MCRF.txt
			rm "$linked"_DOSfrag.txt	
			if [ $all_orbs != "ALLORBS" ]
			then
				rm "$linked"_DOS_line_Formatted_UMOs.txt
			fi

		done
	fi

done
fi




#Finally creating directory for the files and doing some cleaning up
#Moving all output files to correct directories
curr_dir=${fchk_f/.fchk/}	#directory where all current output (plus cp of input file) is stored
if [ ! -d "$curr_dir" ]
then
	mkdir "$curr_dir"
fi

for file in $inp_files
do
	cp $file $curr_dir
	file="${file/"$com_fext"/}"	#remove extension,makes rest easier to code
	if [ $(contains ${all_parents[@]} $file) == "y" ]	#only copy files from parent list ldo
	then
		mv "$file"_MLinpt.txt "$curr_dir"
		mv "$file"_DOS_curve.txt "$curr_dir"
		mv "$file"_DOS_line.txt "$curr_dir"
		mv "$file"_MCRF.txt "$curr_dir"
		mv "$file"_DOSfrag.txt "$curr_dir"
			if [ $all_orbs != "ALLORBS" ]
			then
				mv "$file"_DOS_line_Formatted_UMOs.txt "$curr_dir"
			fi	
	fi
done

#read -p "Press [Enter] key to continue"
#read -p "Press [Enter] key to continue"


#Sorting out the pathname for matlab (needs formatting for windows path)
current_folder=",'""$(pwd)/""${fchk_f/.fchk/}""'"
if [ "$cygwin" -eq "1" ]
then
	echo cygwin functin got called
	echo current folder name is $current_folder
current_folder=$(echo $current_folder | sed 's/\/cygdrive\/c/C:/')
fi
pname="$pname""$current_folder"
done	#terminates BIG loop
pname=$(echo "$pname" | sed 's/.//2')	#deletes 2nd character (which is a comma) of pname
pname=$(echo "$pname" | sed 's/\//\\/g')	#Windows format to help matlab out
pname="$pname""}"


echo "$pname">"pathname_file"





#final part of the script deletes the multiwfn that was copied at the start
rm Multiwfn.exe 
rm disdll_d.dll
rm libiomp5md.dll
rm settings.ini


