Talk:Mod:Hunt Research Group/sort conformers

python script to sort conformers based on relative energies

copy the code into a script called "sort_conformers.py"
cp into the directory containing the .log files from frequency jobs to be sorted (up to now the script only reads files called *freq*.log)
to execute the script type "python PathToScript/sort_conformers.py"
this will create a file sorted_conformers.txt
sorted_conformers.txt contains a list of all structures found in the current directory. Conformers are sorted by relative energy. The file contains all relevant data (is the calculation converged, are there vibrations with negative frequencies, E, DE, H, DH, G, DG, TS, DTS, ZPC, low frequency modes)

Hint: Depending on your file names, the indentation in sorted_conformers can differ. However, if you want to use the python script to create Excel files for the database from sorted conformers don't delete or add tabs/spaces.

#
import os
import glob

'''
################
#FUNCTIONCALITY#
################

This script is designed to read in all freq files saved in a directory, pull out thermodynamic data, sort the files based on total energies in an ascending way and write all information to an output file called 'sorted_conformers.txt'.
For properties in atomic units: The total energy will be written with 8 decimal places, and all other properties with 6 decimal places.

############
#HOW TO RUN#
############

1. cd into the desired directory
2. enter the following line: python PathToScript/sort_conformers.py

###########
#Important#
###########

This script decides to analyse a file based on its name.
To analyse frequency .log files, their names need to include the string 'freq' and need to end with the string '.log'

'''

# Define a class conformer
# The class attribute 'conf_list' stores created instances. The other class attributes define strings to search for thermodynamic data.
# Each instance has the attributes: name (name), energ (e), imaginary frequncies (imFr), convergence (conv), zero point correction (zpc), enthalpy (h), free energy (g), and the product of entropy and temperature (st)

class conformer:

	conf_list = []

	e_string = 'SCF Done'
	zpc_string = 'Zero-point correction='
	h_string = 'Sum of electronic and thermal Enthalpies='
	g_string = 'Sum of electronic and thermal Free Energies='
	imFr_string = 'imaginary frequencies (negative Signs)'
	conv_string = 'Stationary point found.'
	freq_string = 'Low frequencies ---'

	# Default constructor

	def __init__(self):
		self.name = 'NO NAME'
		self.e_tot = 0
		self.imFr = True
		self.conv = False
		self.zpc = 0
		self.h = 0
		self.g = 0
		self.st = 0
		self.freq = ' '

	# Parametrized constructor which appends the created instance to the class attribute list 'conf_list'

	def __init__(self, name, e, imFr, conv, zpc, h, g, freq):
		self.name = name
		self.e = e
		self.imFr = imFr
		self.conv = conv
		self.zpc = zpc
		self.h = h
		self.g = g
		self.st = round(g - h, 6)
		self.freq = freq
		conformer.conf_list.append(self)

# Defining global variables to store the thermodynamic data of each conformer temporarily and the minimum total energy identified (min).

e = 0
zpc = 0
h = 0
g = 0
e_min = 0
h_min = 0
g_min = 0
st_min = 0
imFr = False
conver = False
low_freqs = " "

# Create a list ('log_files') containing all files ending with '.log' in the current directory.
# Identify frequency files by the substring 'freq' and add these to a seperate list ('freq_files').

log_files = []
for log_file in glob.glob('*.log'):
	log_files.append(log_file)
freq_files = []
for log_file in log_files:
	if 'freq' in log_file:
		freq_files.append(log_file)

# Open each file in the current directory which has the substring 'freq' in its name, pull out the termodynamic data, create an instance of the class 'conformer', and add the new instance to the class attribute list 'conformer.conf_list'

for freq_file in freq_files:
	infile = open(freq_file, 'r')
	for line in infile:
		if conformer.e_string in line:
			e = line[(len(conformer.e_string)+15):(len(conformer.e_string)+35)]
			e = float(e)
			if e < e_min:
				e_min = e

		if conformer.imFr_string in line:
			imFr = True
		
		if conformer.freq_string in line:
			low_freqs = line[(len(conformer.freq_string)+1):]
			low_freqs = low_freqs.split()
			conformer.freq_string = 'xxxx'

		if conformer.conv_string in line:
			conv = True
		
		if conformer.zpc_string in line:
			zpc = line[(len(conformer.zpc_string)+1):(len(conformer.zpc_string)+37)]
			zpc = float(zpc)

		if conformer.h_string in line:
			h = line[(len(conformer.h_string)+1):(len(conformer.h_string)+35)]
			h = float(h)
			if h < h_min:
				h_min = h

		if conformer.g_string in line:
			g = line[(len(conformer.g_string)+1):(len(conformer.g_string)+35)]
			g = float(g)
			if g < g_min:
				g_min = g

	conf = conformer(freq_file, e, imFr, conv, zpc, h, g, low_freqs)
	conformer.freq_string = 'Low frequencies ---'
	if conf.st < st_min:
		st_min = conf.st
	imFr = False
	conv = True

# Sort all created instances of the class 'conformer' by their total energies. 

newList = sorted(conformer.conf_list, key=lambda x: x.e, reverse=False)

# Create an output file 'sorted_conformers.txt' containing the names and all thermodynamic data of all created instances of the class 'conformer' in an ascending manner based on total energies.

outData = open('sorted_conformers.txt', 'w+')
outData.write('file name' + '\t\t\t' + 'im fr?' + '\t' + 'conv?' + '\t' + 'E/hartree' + '\t' + 'DE/(kJ/mol)' + '\t' + 'H/hartree' + '\t' + 'DH/(kJ/mol)'  + '\t' + 'G/hartree' + '\t' + 'DG/(kJ/mol)'  + '\t'  + 'ST/hartree' + '\t' + 'DST/(kJ/mol)' + '\t' + 'ZPC/hartree' + '\t' + 'low frequencies' + '\n\n')
for y in range(len(newList)):
      outData.write(newList[y].name[:-4] + '\t')
      if newList[y].imFr:
               outData.write('yes' + '\t')
      else:
               outData.write('no' + '\t')

      if newList[y].conv:
               outData.write('yes' + '\t')
      else:
               outData.write('no' + '\t')
      outData.write(str(round(newList[y].e, 8)) + '\t' + str(round((newList[y].e - e_min)*2625.5, 2)) + '\t\t' + str(round(newList[y].h, 6)) + '\t' + str(round((newList[y].h - h_min)*2625.5, 2)) + '\t\t' + str(round(newList[y].g, 6)) + '\t' + str(round((newList[y].g - g_min)*2625.5, 2)) + '\t\t' + str(round(newList[y].st, 6)) + '\t' + str(round((newList[y].st-st_min)*2625.5, 2))  + '\t\t' + str(round(newList[y].zpc, 6)) + '\t' + str(newList[y].freq[0]) + '\t' + str(newList[y].freq[1]) + '\t' + str(newList[y].freq[2]) + '\t' + str(newList[y].freq[3]) + '\t' + str(newList[y].freq[4]) + '\t' + str(newList[y].freq[5]) + '\n')
outData.close()
#

python script to create Excel files for the database from sorted conformers

copy the code into a script called "sorted_conformers_to_template.py"
download the Excel database |template and save it somewhere you can find it again
edit the "PathToTemplate" section in line 23 according to your system
cp into the directory containing the "sorted_conformers.txt" file
to execute the script type "python PathToScript/sorted_conformers_to_template.py"
this will create a file sorted_conformers.xlsx
sorted_conformers.xlsx contains a list of all structures saved in "sorted_conformers.txt" in the way needed for the group database
After Creating the xlsx file, you need to edit the file, so it contains all relevant data (systematic naming, name of the molecule, method, smd parameters...)

Important: The script transfers all data from sorted_conformers.txt, so structures with negative frequencies will be copied across as well! If you want to ignore these you need to delete them manually before running the script.

#
import os
import openpyxl as opx
import re
import glob

'''
###############
#FUNCTIONALITY#
###############

This script is written to transfer data from a 'sorted_conformers.txt' file generated by the 'sort_conformers.py' script to a Windows Excel file

############
#HOW TO RUN#
############

1. cd into the directory containing the 'sorted_conformers.txt' file and the "DB_Temp.xlsx" file
2. run: python PathToScript/sorted_conformers_to_template

'''
# Open the input file and create the output file with a new Worksheet
inData = open('sorted_conformers.txt', 'r')
wb = opx.load_workbook("PathToTemplate/DB_Temp.xlsx")
ws = wb.active
# Define variables for x and y descriptors within the xlsx file and a list which will contain the seperated data of the input file
list = []
y = 7
x = 1

# Add data from input file to list and delete blank line
for line in inData:
        list.append(re.split('\t+|\n+', line))
list.pop(1)

# Write data from the input file to the Excel output file
for elem in list:
        if y > 7:
                while x < 4:
                        ws.cell(row=y,column=x).value = elem[x-1]
                        x = x + 3
                while x < 19:
                        ws.cell(row=y,column=x).value = elem[x-1]
                        x = x + 1
        x = 1
        y = y + 1

#close input and output files
wb.save('sorted_conformers.xlsx')
inData.close()
#