#! /usr/bin/env python

#WARNING:ONLY USE ON DOS_Line FILES FROM BETA ELECTRONS
#WTF BUG
#
#
#Numb_Fragments=5 seems to sOMETIMES break the error check, fuck knows why (maybe its frag numbs <5....dont know. anyway its shit.
#MAY need to disable the error check in that case to make all run nicely
#
#ALSO sometimes the output of multiwfn isnt properly delimted (e.g if OpDoS is -(10^4)) and that breks the script.
#In that case (currently) need to edit the DoS_Line file manually and run this script manually
#ACTUALLY Remove_Stars_And_Nondelims should now sort this WTF BUG out (i hope).
#WTF BUG
#Formats a multiwfn output file, with the help of an input file in the following form:

#History:
#Sept 2016: Finished fixing bug where it got confused when multiwfn output wasnt properly delimited (see Remove_Stars_And_Nondelims)



#File=SCN_B3LYP_631gdp_gd3bj_BMIM_SCN_SMD_abr.fchk
#Numb_Fragments=3
#1a=1-3
#1t=a
#1l=TDOS
#2a=3,2
#2t=s,p
#2l=S3s
#3a=3
#3t=p+

#The script Gen_Multiwfn_Input_Comms uses 1a/1t (and other numbers) while this script mainly needs
#the l value (l is for label) to label the fragments in the output file

#This script formats the output of multiwfn (the line plot) into a form that matlab should like
#(name of matlab script pending). Also carries out a check to see if the sum of the fragments 2 to n
#matches the value of fragment 1. If so outputs messege to console+at top of output file (making it unreadable 
#for matlab, therefore requiring manual removal if you still want to continue).

#Input Variables(Passed as command line arguments in this order):
#1) Line plot data from multiwfn (multiwfn default name - DOS_line)
#2) Original Input_File used to create multiwfn commands (with Gen_Multiwfn_Input_Comms script)
#3) string saying "AllOrbs" (case insenstitive) if you want all orbitals (Ocupied and unoccupied) in 1 file
#(default is one file for each)
#4) string saying "NoError" (case insensitive) if you dont want any error-checking
#NOTE ALSO:the relevant fchk file needs to be in the same folder
#Output 
#Either:
#1)File containing all orbitals in format for matlab script ("AllOrbs" option passed as third argument)
#2)One file with occupied orbitals and one with unoccupied orbitals (both in matlab friendly format)

import sys
import numpy as np
import re

#Imports+Format the command input file(Orig_Input_File) and places it in a string list variable
def Import_Comm_File(Fname):
	fobject = open(Fname,"r")
	String = fobject.read()
	fobject.close()
        String = re.sub("[\t\f\v ]","",String) #Removes all whitespace
	String = String.split('\n')
	#String_List = [line for line in String.split('\n') if line.strip() !=' '] should remove blank lines bt doesnt. oh well.
	#String = '\n'.join(String_List)
	return String

def Import_Fchk_File(Fname):
	fobject = open(Fname,"r")
	String = fobject.read()
	fobject.close()
	#String_List = [line for line in String.split('\n') if line.strip() !=' '] should remove blank lines bt doesnt. oh well.
	#String = '\n'.join(String_List)
	return String



def Get_Numb_Occ_Orbitals(Fchk_File):
	#Get Number Alpha electrons
	#Electron_Line_Start = Fchk_File.find("Number of alpha electrons") #Index where numb electrons line starts
	#Electron_Line_End = Fchk_File.find("Number of beta electrons") -1 #Index where numb electrons line ends   
	#Whole_Electron_Line = Fchk_File[Electron_Line_Start:Electron_Line_End]
	#Electron_Line_List = Whole_Electron_Line.split(" ")
	#Number_Alpha_Electrons = int(Electron_Line_List[-1])
	#Number_OMOs = Number_Alpha_Electrons

	#Get Number Beta electrons
	Electron_Line_Start = Fchk_File.find("Number of beta electrons") #Index where numb electrons line starts
	Electron_Line_End = Fchk_File.find("Number of basis functions") -1 #Index where numb electrons line ends   
	Whole_Electron_Line = Fchk_File[Electron_Line_Start:Electron_Line_End]
	Electron_Line_List = Whole_Electron_Line.split(" ")
	Number_Beta_Electrons = int(Electron_Line_List[-1])
	Number_OMOs = Number_Beta_Electrons
	#Get Number of orbitals (assume this should work for restricted open shell calcs too,FYI i did it this
	#way to avoid dividing by two, though i might get a rounding error somewhere if i did that)
	#if Number_Alpha_Electrons == Number_Beta_Electrons:
	#	Number_OMOs = Number_Alpha_Electrons
	#else:
	#	Number_OMOs = Number_Alpha_Electrons + abs(Number_Alpha_Electrons - Number_Beta_Electrons)
	#	print ('Restricted open shell system detected in Format_Multiwfn_Output - Be careful this is',
	#			'Currently Untested, in particular make sure the HOMO is present in the output')
	
	return Number_OMOs 

#Converts au to eV, deletes useless columns, deletes UMOs and puts in separate array in that option is chosen
def Format_Data(Multiwfn_Output,Number_OMOs,Orbital_String,Numb_Frags):
	Formatted_UMOs = ['']
	Array_Dims = Multiwfn_Output.shape #First contains number rows,second number cols
	
	#POSSIBLE ERROR: could round the floats wrong here (the div by 3 step)
	#This mess creates OMO and UMO matrices with all the empty rows deleted
	if Orbital_String == "ALLORBS":
		Formatted_OMOs = np.zeros((Array_Dims[0]/3,Array_Dims[1])) #NOTE: 1/3 of the rows in input are blank
		Formatted_UMOs = ['']
	else:
		Formatted_OMOs = np.zeros((Number_OMOs,Array_Dims[1]))
		UMO_Orbs = ((Array_Dims[0])/3) - Number_OMOs
		Formatted_UMOs = np.zeros((UMO_Orbs,Array_Dims[1]))

	if Orbital_String == "ALLORBS":		#This option may break here.oops
		Counter = 0
		for i in range(1,Array_Dims[0],3):
			for j in range(0,Array_Dims[1],1):
				Formatted_OMOs[Counter][j] = Multiwfn_Output[i][j]
			Counter = Counter + 1
	else:
		Counter = 0
		for i in range(1,Number_OMOs*3,3):
			for j in range(0,Array_Dims[1],1):
				Formatted_OMOs[Counter][j] = Multiwfn_Output[i][j]
			Counter = Counter + 1
		Counter = 0
		for i in range((Number_OMOs*3)+1,Array_Dims[0],3):
			for j in range(0,Array_Dims[1],1):
				Formatted_UMOs[Counter][j] = Multiwfn_Output[i][j]
			Counter = Counter + 1
	

	#This deletes all irrelevant columns (TDOS,ODOS and empty columns)
	Formatted_OMOs = np.delete(Formatted_OMOs,(1),axis = 1)
	Formatted_OMOs = np.delete(Formatted_OMOs,(1),axis = 1)
	FO_Dims = Formatted_OMOs.shape   #Need to figure out how many columns to delete off end
	Useless_Frags = FO_Dims[1] - Numb_Frags
	for i in range(1,Useless_Frags):
		Formatted_OMOs = np.delete(Formatted_OMOs,(Numb_Frags+1),axis =1)
	#POSSIBLE ERROR: I DONT CARE ABOUT FORMATTED UMOS, do not much bug testing been done
	if Orbital_String != "ALLORBS":
		Formatted_UMOs = np.delete(Formatted_UMOs,(1),axis = 1) 
		Formatted_UMOs = np.delete(Formatted_UMOs,(1),axis = 1)
		FU_Dims = Formatted_UMOs.shape
		Useless_Frags = FU_Dims[1] - Numb_Frags
		for i in range(1,Useless_Frags):
			Formatted_UMOs = np.delete(Formatted_UMOs,(Numb_Frags+1),axis=1)


	#This part converts au to eV
	Formatted_OMOs[:,0] *= 27.212
	if Orbital_String != "ALLORBS":
		Formatted_UMOs[:,0] *= 27.212


	return Formatted_OMOs,Formatted_UMOs


#Pull labels from the original input file and uses to create header for output file
#Returns a string containing the header. Input "Instructions" is a string list of original input file
def Generate_Header(Instructions,Numb_Frags):
	Header = "Energy (eV)"	
	frag_numb=1
	for i in range(2,2+(Numb_Frags*3),3):
		Label = re.sub(str(frag_numb)+"l=","",Instructions[i+2])
		Label = Label.upper()		#Capitalises label, removes case-sensitivity of later sccripts
		Header = Header + "," + Label	
		frag_numb += 1
	return Header

#Just checks that the sum of OMOs fragment 2-10 equals fragment 1.FChk_File required for the error messege

def Error_Check(OMOs_Matrix,Fchk_File):
	print "ERROR CHECK HAS BEEN CALLED"
	Tolerance = 0.001  #Two sums can differ by this much, must be >0 to allow for rounding errors,fp number errors etc.
	Error_Message = "" #Will only have a message if error found
	Dimensions = OMOs_Matrix.shape
#	print "Dimensions of OMOs matrix are " + str(Dimensions[0]) + "x" + str(Dimensions[1])
	for i in range(0,Dimensions[0]):
		curr_row_sum = np.sum(OMOs_Matrix[i,2:])  
		Error = abs(curr_row_sum - OMOs_Matrix[i,1])
#		print "curr_row_sum = " + str(curr_row_sum) + " error= " + str(Error)
		if Error>Tolerance:
			Error_Message = "Fragment Sum incorrect on row " + str(i+1) + " for fchk file " + Fchk_File +"\n"
			print Error_Message
			break
	
	return Error_Message

#Returns string w/M_Factor of input file
def M_Factor(Instructions):
	mult_factor = 1
	for line in Instructions:
		line=line.upper()
		if line.find("M_FACTOR")!=-1:
			mult_factor=float(re.sub("M_FACTOR=","",line))
			break
	return mult_factor

#Removes the *** characters that multiwfn sometimes puts in the output line for (only happens when you pipe a set
#of commands to multiwfn,doesnt happen when you enter the EXACT same commands manually.stupid bug.)
# Also will now remove a zero in cases where 2nd(TDOS)/3rd column in multiwfn not delimited
def Remove_Stars_And_Nondelims(L_Plot_File):
	fobject = open(L_Plot_File,"r")
        String = fobject.read()
        fobject.close()
	print String[0:10]
	String_List=String.split("\n")
	New_String=""
	for line in String_List:
		line=re.sub("\*\*\*\*\*\*\*\*\*\*\*\*","    0.000000",line)
		line=re.sub("000000-","00000 -",line)
		#bit below deals with case where a digit 0-9 follows the TDOS and isnt delimited (too lazy to code in clever way atm)
		line=re.sub("0000001","00000 1",line)
		line=re.sub("0000002","00000 2",line)
		line=re.sub("0000003","00000 3",line)
		line=re.sub("0000004","00000 4",line)
                line=re.sub("0000005","00000 5",line)
                line=re.sub("0000006","00000 6",line)
                line=re.sub("0000007","00000 7",line)
                line=re.sub("0000008","00000 8",line)
                line=re.sub("0000009","00000 9",line)	
		New_String=New_String+line+"\n"
	fobject=open(L_Plot_File,"w")
	fobject.write(New_String)
	fobject.close()
        #String_List = [line for line in String.split('\n') if line.strip() !=' '] should remove blank lines bt doesnt. oh well.
        #String = '\n'.join(String_List)
        return String
	


def Main():
	#Variables that control how the script runs
	L_Plot_File = sys.argv[1]
	Orig_Input_File = sys.argv[2]
	if len(sys.argv)>3:
		Orbital_String = (sys.argv[3]).upper()
	else:
		Orbital_String=''	
	if len(sys.argv)>4:
		Error_String = (sys.argv[4]).upper()
	else:
		Error_String = ''
#	L_Plot_File ="DOS_line_1.txt"
#	Orig_Input_File = "Test_Input_File.txt"
#	Orbital_String = " " #Need to capitalise the input
#	Error_String = " "   #Need to capitalise the actual input

	#Mainly importing data in
	Instructions = Import_Comm_File(Orig_Input_File)  #Original input file (String list)
	Fchk_File = re.sub("\r","",Instructions[0]) #Name of the fchk file to get number OMOs from	
	Fchk_Contents = Import_Fchk_File(Fchk_File)	  #
	Remove_Stars_And_Nondelims(L_Plot_File)	#Takes out *** values from multiwfn output(someimtes they appear for some reaon
	Multiwfn_Output = np.loadtxt(L_Plot_File)  #Stores all the data in an array
	Numb_Frags = int(re.sub("Numb_Fragments=","",Instructions[1]))

	#Generating the output file names
	if Orbital_String != "ALLORBS":
		OMOs_FileName = L_Plot_File.replace(".txt","_MLinpt.txt")
	else:
		OMOs_FileName = L_Plot_File.replace(".txt","_MLinpt.txt")
	UMOs_FileName = L_Plot_File.replace(".txt","_Formatted_UMOs_beta.txt")

	
	#Formatting Data + labelling columns (Generate_Header)
	N_OMOs = Get_Numb_Occ_Orbitals(Fchk_Contents) #Number of orbitals which are occupied
	Formatted_OMOs,Formatted_UMOs = Format_Data(Multiwfn_Output,N_OMOs,Orbital_String,Numb_Frags)
	File_Header = "M_FACTOR=" + str(M_Factor(Instructions)) + "\n" 
	File_Header = File_Header + Generate_Header(Instructions,Numb_Frags)
#	print Formatted_OMOs
#	np.savetxt(OMOs_FileName,Formatted_OMOs,delimiter=',',header=File_Header,fmt='%.6f')
	if Error_String!="NOERROR":
		File_Header = Error_Check(Formatted_OMOs,Fchk_File) + File_Header  #Error_Check is empty string if file is fine,otherwise error msg

	#Just testing at the moment, note a header argument can be used here
	np.savetxt(OMOs_FileName,Formatted_OMOs,delimiter=',',header=File_Header,fmt='%.6f')
	if Orbital_String != "ALLORBS":
		np.savetxt(UMOs_FileName,Formatted_UMOs,delimiter=',',header=File_Header,fmt='%.8f')

Main()





