#! /usr/bin/env python
#Script takes a file in format below, and converts it into a series of commands 
#for multiwfn
#Input format

#SCN_B3LYP_631gdp_gd3bj_BMIM_SCN_SMD_abr.fchk
#Numb_Fragments=3
#1a=1-3
#1t=a
#1l=TDOS
#2a=3,2
#2t=s,p
#2l=S3s
#3a=3
#3t=p+
#ML_Factor=1.0
#Linked_File = xxxxx.txt

#1a lists atoms for fragment 1
#1t lists the type of basis functions to include,a means all
#1l means the label of the fragment. NOT used for this script, but needs to be present (as it loops over it)
#the label variable should be of form [Element symbol][shell][type of orbital]. e.g S3p N2p C2s (call 1l whatever you want,
#since this column should always contain the fragment TDOS). If your calculating DoS (i.e hv<0 in Matlab script) then this
#can be whatever you want.
#p means p-type AOs, d means d type etc.
#For more than 1 type of basis function comma delimit it
#p+ means p AOs and anything with higher angular momentum as well 
#ML_Factor MUST be present. Lets you multiply intensities of a fragment in the Matlab script (for if your model system stoichiometry
# is different to experimental. e.g you want the DoS of 1 ion dissolved in water in a 0.01:0.99 mole fraction ratio but used just 
#1 water molecule and 1 ion - set ML_Factor to 99 for water fragment and 1 for the ion fragment to account for that, at least
# intensiy wise
#Linked file is if you want to treat multiple input files togather. e.g your anion has more than 10 Numb_Fragments so needs two
# input files, but you want them to be merged into 1 fragment for the ML script. set Linked_file equal to the name of the file 
#you want to be the "parent file". e.g to link both F2 and F3 to F1 (name of F1 file will be final fragment name) 
#set Linked_File=F1 in both files F2 and F3.IMPORTANT:A file cant be both a parent and a child. e.g in the example above you can not
#also link F4 to either F2 or F3 (but can link it to F1)


#Output:
#When used with multiwfn ("Multiwfn<Output_File") it will generate a DoS
#line/stick plot in current folder. MUST have Multiwfn in silent run mode
#(isilent variable in the settings.ini)

import os
import sys
import string
import re


#Fname ="Test_Input_File.txt"


#Puts whole file contents in a list variable (Formats to get rid of " " etc)
def Import_File(FileName):
	fobject = open(FileName,"r")
	String = fobject.read()
	fobject.close()
        String = re.sub("[\t\f\v ]","",String) #Removes all whitespace
	String = String.split('\n')
	#String_List = [line for line in String.split('\n') if line.strip() !=' '] should remove blank lines bt doesnt. oh well.
	#String = '\n'.join(String_List)
	return String

#Writes the initial part of the output file
#(essentially the commands to get up to entering fragments in Multiwfn)
def Output_Header(Output_FileName,Fchk_File):
	fobject = open(Output_FileName,"w")
	fobject.write(Fchk_File+"\n")
	fobject.write("10"+"\n"+"-1\n") #gets to the "define fragments part of multiwfn
	fobject.close()

#Finds out which type of basis function is present (e.g D+2->D-2 or XX/XY for d) for all
#types of basis functions used.
def Generate_Basis_Types(Fchk_File):
	fobject= open(Fchk_File,'r')
	Counter = 1	#Counts which line we're on in the loop, was really just for testing(can be removed)
	Counter2 = 0
	Seq_Start = 0  #Changes to 1 when we hit part of file we want
	Shell_Type_String = []	#Holds the Shell type values from the .fchk file 
	#Extracts Shell types from output file
	for line in fobject:
		if line.find("Number of primitives")!=-1: #Causes loop to end once shell types read
			Shell_Line_End = Counter
			break

		if Seq_Start ==1:
			Shell_Type_String.append(line)

		if line.find("Shell type")!=-1:
			Seq_Start = 1	#Will cause lines to be added to Shell_Type_String Next loop 
		
		Counter=Counter+1
	#PUT IN ERROR MESSEGE HERE IF Start_Seq stays at 0
	if Seq_Start==0:
		print "Error in Generate_Basis_Types in script Gen_Multiwfn_Input_Comms - Shell types not found"

	#Now searches for certain numbers which relate to the type of basis sets present (see g09 manual)
	#I dont currently know the exactt form of some of these, so currently throws error if they come up
	#Its really easy to work out by listing all basis functions in Multiwfn tho
	Shell_Type_String='\n'.join(Shell_Type_String)	
	#Varible Cheking for mixed basis sets (which i cant handle atm)
	N_dorbs = 0;
	N_forbs = 0;
	N_gorbs = 0;

	#Checking if s/p basis funcs are present	
	if Shell_Type_String.find(" 0")!=-1 or Shell_Type_String.find("-1")!=-1:
		s_orbs = ['S']
	else:
		s_orbs = ['']
	if Shell_Type_String.find(" 1")!=-1 or Shell_Type_String.find("-1")!=-1:
		p_orbs = ['X','Y','Z']
	
	#Checking for d/f/g type orbitals
	if Shell_Type_String.find("-2")!= -1:
		d_orbs = ["D-2","D-1","D 0","D+1","D+2"]
		N_dorbs = N_dorbs+1
	elif Shell_Type_String.find(" 2")!=-1:
		d_orbs = ["XX","YY","ZZ","XY","XZ","YZ"]
		N_dorbs = N_dorbs+1
	else:
		d_orbs = ['']

	if Shell_Type_String.find("-3") !=-1:
		f_orbs = ["F-3","F-2","F-1","F 0","F+1","F+2","F+3"]
		N_forbs = N_forbs+1
	elif Shell_Type_String.find(" 3") !=-1:
		print "Unknown f-orbital basis present in file, please update Generate_Basis_Types of script Gen_Multiwfn_Input_Comms.py"
		N_forbs = N_forbs+1
	else:
		f_orbs = ['']

	if Shell_Type_String.find("-4") !=-1:
		g_orbs = ["G-4","G-3","G-2","G-1","G 0","G+1","G+2","G+3","G+4"]
		N_gorbs = N_gorbs + 1
	elif Shell_Type_String.find(" 4")!=-1:
		print "Unknown f-orbital basis present in file, please update Generate_Basis_Types of script Gen_Multiwfn_Input_Comms.py"
		N_gorbs = N_gorbs + 1
	else:
		g_orbs = ['']
	
	#Testing for higher angular momentum orbitals being present (and giving appropriate error messege
	if Shell_Type_String.find("5")!=-1 or Shell_Type_String.find("6")!=-1:
		print "ERROR: higher angular momenta than g-orbitals are not accounted for in Gen_Multiwfn_Input_Comms.py"
	#Testing only 1 set of d/f/g orbitals is present
	if N_dorbs>1 or N_forbs>1 or N_gorbs>1:
		print "ERROR: Mixed basis set detected (multipel types of d/f/g orbitals) and not supported in Gen_Multiwfn_Input_Comms.py"
	
	#printing all just for testing	
	#print s_orbs
	#print p_orbs
	#print d_orbs
	#print f_orbs
	#print g_orbs
	#print Shell_Type_String

	return s_orbs,p_orbs,d_orbs,f_orbs,g_orbs

#Turns the "+" in the input file into a list of orbital types. Will go up to g-orbs currently (no harm goign higher than is actually present)
#POSSIBLE ERROR: if two plus are found im screwed at the moment(...but two plus would be stupid,so just throw an error messege)
#Also thrwo error messege if 1 + remains at the end of the function
def Format_Orb_List(Orb_List):
	
	#Orb_List = ','.join(Orb_List) #Convert list to string
	if Orb_List.find("s,+") != -1:
		Orb_List = re.sub("s,\+","s,p,d,f,g",Orb_List)
	if Orb_List.find("p,+") != -1:
		Orb_List = re.sub("p,\+","p,d,f,g",Orb_List)
	if Orb_List.find("d,+") != -1:
		Orb_List = re.sub("d,\+","d,f,g",Orb_List)
	if Orb_List.find("f,+") != -1:
		Orb_List = re.sub("f,\+","f,g",Orb_List)
	if Orb_List.find("g,+") != -1:
		Orb_List = re.sub("g,\+","g",Orb_List)
		print "NOTE: g orbitals are the highest ang. mom. accounted for by the Gen_Multiwfn_Input Script"

	#Orb_List = Orb_List.split(",") #Convert string back to list
	return Orb_List

#This turns an atom list and orbital list into a string thats input for Multiwfn
#(assumes we're at the define fragments section of multiwfn and will leave in same place we start)
def Generate_Input(Atom_List,Orb_List,frag_numb,s_orbs,p_orbs,d_orbs,f_orbs,g_orbs):
	Output_String = str(frag_numb) + "\n"	#Holds all commands to build this fragment
	Atom_List_Str = "".join(Atom_List)	#Contains all fragment atoms in nice string format
	if Orb_List[0]=="a":
		Output_String = Output_String + "cond\n" + Atom_List_Str + "\na\na\nq\n"
	else:
		Orb_String = ','.join(Orb_List)
		temp_string = ""	#Holds all commands for this frag based on orbital list (gets appendend to output_string
		if Orb_String.find("s") != -1 and s_orbs[0]!='':
			for orbital in s_orbs:
				temp_string = temp_string + "cond\n" + Atom_List_Str + "\na\n" + orbital + "\n"
	#		temp_string = temp_string+"\nq\n"
		if Orb_String.find("p") != -1 and p_orbs[0]!='':
			for orbital in p_orbs:
				temp_string = temp_string + "cond\n" + Atom_List_Str + "\na\n" + orbital + "\n"
	#		temp_string = temp_string + "\nq\n"
		if Orb_String.find("d") != -1 and d_orbs[0]!='':
			for orbital in d_orbs:
				temp_string = temp_string + "cond\n" + Atom_List_Str + "\na\n" + orbital + "\n"
	#		temp_string = temp_string + "\nq\n"
		if Orb_String.find("f") != -1 and f_orbs[0]!='':
			for orbital in f_orbs:
				temp_string = temp_string + "cond\n" + Atom_List_Str + "\na\n" + orbital + "\n"
	#		temp_string = temp_string + "\nq\n"
		if Orb_String.find("g") != -1 and g_orbs[0]!='':
			for orbital in g_orbs:
				temp_string = temp_string + "cond\n" + Atom_List_Str + "\na\n" + orbital + "\n"
	#		temp_string = temp_string + "\nq\n"
		temp_string = temp_string + "\nq\n"
		Output_String = Output_String + temp_string	

	return Output_String



def Main():
	Fname = sys.argv[1]
	Instructions = Import_File(Fname) #Holds whole input file
	Output_File_Name = Fname.replace('.txt','_MCRF.txt') #File generated by this code
	Fchk_File=re.sub("\r","",Instructions[0]) #Name of the fchk file multiwfn will use
	Output_Header(Output_File_Name,Fchk_File) #Writes the initial instructions to the output file.
	Numb_Frags = int(re.sub("Numb_Fragments=","",Instructions[1]))
	s_orbs,p_orbs,d_orbs,f_orbs,g_orbs = Generate_Basis_Types(Fchk_File)
	#Below is the main loop that does most stuff in this script
	frag_numb = 1 #Tracks which fragment we're looping over
	Full_Frag_Commands = "" #String contains all the commands to create the fragments (starting+finishing at the point the Header gets up to)
	for i in range(2,2+(Numb_Frags*3),3):
		Atom_List = re.sub(str(frag_numb)+"a=","",Instructions[i])
		Orb_List = re.sub(str(frag_numb)+"t=","",Instructions[i+1]) #list of orbital types for current fragment
		Orb_String = ','.join(Orb_List)	#Need it to be a string temporarily to format properly
		if Orb_String.find("+")!=-1:	#If a plus sign is found the orbital list needs formatting to explicitly include all orbitals
			Orb_String = Format_Orb_List(Orb_String)
		Orb_List = Orb_String.split(",")
		Fragment_Commands = Generate_Input(Atom_List,Orb_List,frag_numb,s_orbs,p_orbs,d_orbs,f_orbs,g_orbs) 
		Full_Frag_Commands = Full_Frag_Commands + Fragment_Commands
		frag_numb=frag_numb+1
	File_Tail= "e\n0\n2\n-2 0.4 0.2\n0\n3\n "		#Contains the part of the input comms. to create line plot from frags defined
	Full_Commands = Full_Frag_Commands + File_Tail #All Commands other than the header in one string
#	print Full_Commands
	Full_Commands = re.sub('\n\s*\n+','\n',Full_Commands)	#Trying to remove all blank lines
	fobject1 = open(Output_File_Name,"a")
	fobject1.write(Full_Commands)
Main()

#TO DO:
#Secondly i may modify the input file to contain a label for each fragment,if so i'll
#put it below each orbital type for each fragment,so will just have to loop in 3s rather 
#than 2s in the Main() function

