classdef NBO_E2_Class_V2 <handle     %Inheriting from handle class allows the methods to modify object properties(lol that this isnt default)
    %UNTITLED For creating objects for analysing NBO E(2) values from g09
    %.log files
    %   Pass a full file path to an NBO file in order to create the object
    
    
    %History:
    %V1 - Completted somewhere in 2016, dont care when
    %Mar 2017 - Added the full_filename,all_charges properties +
    %           get_charges method
    %           fixed bug caused by different number of lines between
    %           "PERTURBATION" and "within unit 1" for lone ions vs ion
    %           pairs (>1 unit means extra line specifying intermolecular
    %           print threshold)
    %V2(July 2017) - Made it work on both windows and linux/mac 
    
    
    properties
        unit_list;  %1xn cell array containing molecular formulae of units defined in the NBO analysis (i.e molecules)
        frag_charges; %1xn cell array containing NBO charge for the relevant entry in unit list
        DA_matrix;  %nxn cell array containing objects with info on donor-->acceptor
        e_scf;  %Obviously the scf energy in atomic units if present. ACTUALLY won't be implementing because NBO needs to run w/o dispersion correction,hence energies will suck anyway
        base_fname;   % The filename minus extension and path
        full_filename; %The input variable filename, lets it run again on same file
        all_charges;    %OPTIONAL:nx3 cell array,  [element,label,[1-->n],NBO charge]
        %~useless stuff (intermediate results, probably shouldnt be a
        %"property"
        E2_section; %Full part of the file containing E2 analysis (cell array, 1 row = 1 line)
        search_string_matrix;  % nxn matrix containing serch strings required to find donor/acceptor relationships in E2_section. (column=donor unit,row = acceptor unit)
        rel_lines_matrix;   %nxn matrix, same format as search string except each cell contains an array of the relevant lines of the D-->A relationship
    end
    
    methods
        function obj = NBO_E2_Class_V2(filename)    %CONSTRUCTOR. Extracts all info from the .log file
         if nargin>0 %Needs to do nothing if given no args, error msg would stop initiation of object arrays

            
            tic
        %****SECTION 1****%
        %Put file in 1xn cell array(column), each line=1 row. also put the
        %base filename as one of the object properties
        obj.full_filename=filename;
        fobject1=fopen(filename);
        full_file = fileread(filename);
        full_file = strsplit(full_file,'\n')';
        fclose(fobject1);
        
        %tstring = strsplit(filename,'\');
        %obj.base_fname = strrep(tstring{end},'.log','');

        [~,tstring] = fileparts(filename);
        obj.base_fname = strrep(tstring,'.log','');
        
        %****SECTION 2****%
        %Get line numbers for all lines which define chem. formula of a "unit"
        lnumb_string = ''; %comma delim string
        search_term = 'Molecular unit' ;
        for i=1:size(full_file,1)
            if ~isempty(strfind(full_file{i,1},search_term));
                if strcmp(lnumb_string,'')
                    lnumb_string = num2str(i);
                else
                    lnumb_string = strcat(lnumb_string,',',num2str(i));
                end
            end
        end
        %****SECTION 3****%
        %Extract chemical formula for each unit
        lnumb_array = strsplit(lnumb_string,',')';	%1xn (column) cell array w relevant line numbers
        obj.unit_list = cell(size(lnumb_array,1),1) ; %OUTPUT ARG. see funct. description
        for i=1:size(lnumb_array,1)
            curr_line = str2num(lnumb_array{i,1});
            obj.unit_list{i,1} =regexp(full_file{curr_line,1},'\([a-za-zA-Z_0-9]*)','match');
        end

       %****SECTION 4****%
        %Cut file down to the section we need.(store in cotnracted_file cell array)
        for i = 1 :size(full_file,1)
            if ~isempty(strfind(upper(full_file{i,1}),'SECOND ORDER PERTURBATION'));
                if size(obj.unit_list,1)>1
                    start_line = i+6;
                else
                    start_line = i+5; %no "intermolecular threshold" line for lone ions
                end
                for j=i+1:size(full_file,1)
                    if  ~isempty(strfind(upper(full_file{j,1}),'NATURAL BOND ORBITALS'));
                      %  end_line = j-1;
                      end_line=j;   %Added blank line at end, as was missing 1 line off the last DA entry before.
                        break;
                    end
                end
                break;
            end
        end
        contracted_file = full_file(start_line:end_line,1);
        obj.E2_section = contracted_file;  %Needs an extra blank line at end for script to work on final E2 entry
        
        %****SECTION 5****%
        %Get search_string output matrix
        %NOTE: search_string and line number marices have format:
        %[unit 1 -->unit 1]
        %[unit 1-->unit 2]
        %[unit 1-->unit n]
        %[unit 2--->unit 1]
        %[unit 2-->unit 2] etc.
        numb_units = size(obj.unit_list,1);
        obj.search_string_matrix = cell(numb_units,numb_units); %See the proprties section of class
        line_number_matrix = NaN(numb_units*numb_units,2); %nx2 (column) matrix, 1st col = start line (in contreaced_file) for the info for unit in search_string, 2nd=end line
        line_number_matrix(end,2) = size(contracted_file,1);
        
        %Generate search_string matrix (nxn matrix, col number is donor
        %unit, row number is acceptor unit)
        for donor=1:numb_units  %columns
            for acceptor=1:numb_units %rows
                if donor==acceptor
                    obj.search_string_matrix{acceptor,donor} = ['within unit  ' num2str(acceptor)];
                else
                    obj.search_string_matrix{acceptor,donor} = ['from unit  ' num2str(donor) ' to unit  ' num2str(acceptor)];
                end
            end
        end
        
        %***Section 6****%
        %Generate rel_lines_matrix (see properties in class). NOT WORKING
        %FOR LONE IONS(SMD) currently.
        stripped_E2_section = regexprep(obj.E2_section,'\W',''); %Remove whitespace, makes searching easier
        obj.search_string_matrix = regexprep(obj.search_string_matrix,'\W','');
        obj.rel_lines_matrix = cell(size(obj.search_string_matrix,1), size(obj.search_string_matrix,2));
        for col=1:size(obj.search_string_matrix,2) %Donor
            for row=1:size(obj.search_string_matrix,1) %Acceptor
               for i =1:size(obj.E2_section,1) %Loop thru file lookin for search_string
          %     for i =1:size(obj.E2_section,1) %Loop thru file lookin for search_string. Think the +1 should fix the problem whereby last line missing off last rel_lines_matrix if its a "within unit" thing
                   if strcmpi(obj.search_string_matrix{row,col},stripped_E2_section{i,1})    %where section starts
                       start_line = i;
                       for j=start_line+1:size(obj.E2_section,1);
                           if ~isempty(strfind(obj.E2_section{j,1},'from')) | ~isempty(strfind(obj.E2_section{j,1},'within')) | j == size(obj.E2_section,1)
                               end_line = j-1;
                               break;
                           end
                       end
                       obj.rel_lines_matrix{row,col} = {obj.E2_section{start_line:end_line}}'; %Problem is size of E2 section much smaller than this....
                       break;
                   end
                end
            end
        end


        %****SECTION 7****
        %Generating all the Donor-->Acceptor objects
        DA_array(size(obj.rel_lines_matrix,1),size(obj.rel_lines_matrix,2)) = A_D_NBO_E2_Class_V1;
        
        for row=1:size(DA_array,1)
            for col=1:size(DA_array,2)
                DA_array(row,col) = A_D_NBO_E2_Class_V1(obj.unit_list{col,1},obj.unit_list{row,1},obj.rel_lines_matrix{row,col});
            end
        end
        
        
        obj.DA_matrix = DA_array
        toc
        
        
        %****SECTION 8****
        %Getting all the unit charges
        obj.frag_charges=NaN(size(obj.unit_list,1),1);
        search_regexp = 'Charge unit\s+\d+.+\d' ;
        frag_charge_counter=1;
        for i=1:size(full_file,1)
            if ~isempty(regexp(full_file{i,1},search_regexp));
                tstring = strsplit(strtrim(full_file{i,1}));  %Actually cell array w/the number in last
                obj.frag_charges(frag_charge_counter,1) = str2num(tstring{1,end});
                frag_charge_counter = frag_charge_counter + 1;
            end
        end
        
        end
        end
        
        %Note that for this function the diagonals of D_A matrix are the
        %combination of both intra- and inter-molecule D_A interactions for
        %those ones.
        
        %THE DA PART NEEDS CAREFUL CHECKING FOR OFF DIAGONAL ELEMENTS. (Use
        %2H2O case i guess)
        function Merge_Like_Fragments(obj)          %Recreates the unit_list/rel_lines_matrix/DA_matrix/fragment_charges so that fragments with same name are merged (e.g all water fragments merged into one)
            
            %****SECTION 1****
            %Merging the unit_list
            new_unit_list = cell(1,1);
            new_unit_list{1,1} = obj.unit_list{1,1};
            new_unit_counter=2;	%Tracks the part of new_unit_list to add to
            for i=2:size(obj.unit_list,1)
                %True if current obj.unit_list entry is NOT already in new_unit_list
                if ~any(cellfun(@(x) strcmp(obj.unit_list{i,1},x),new_unit_list))
                    new_unit_list{new_unit_counter,1} = obj.unit_list{i,1};
                    new_unit_counter = new_unit_counter + 1;
                end
            end
            
           %****SECTION 2****
           %Creating index list. Contains the locations of each
           %new_unit_list fragment in the obj.unit_list part
            merged_indice_list = cell(size(new_unit_list,1),1);
            
            for i=1:size(new_unit_list,1)
                temp_array=NaN(1,1);	%holds all indices of unit_list that contain current fragment
                curr_counter = 1;	%Counts which array entry we're on for current temp_array
                for j=1:size(obj.unit_list,1)
                    if strcmp(new_unit_list{i,1},obj.unit_list{j,1})
                        temp_array(curr_counter,1) = j;
                        curr_counter = curr_counter + 1;
                    end
                end
                merged_indice_list{i,1} = temp_array;
            end
            
            %***SECTION 3***
            %Merging the charge list
            merged_charge_list = NaN(size(new_unit_list,1),1);
            for i=1:size(merged_charge_list,1)
                charge_sum = 0; %We add charges from lots of units, this tracks current total 
                curr_list = merged_indice_list{i,1}; %Array containing indices for current fragment
                for j=1:size(curr_list,1)
                    charge_sum = charge_sum + obj.frag_charges(curr_list(j,1),1);
                end
                merged_charge_list(i,1) = charge_sum;
            end
            
            
            
            %***SECTION 4***
            %Merging the relevant lines matrix
            merged_rel_lines = cell(size(new_unit_list,1),size(new_unit_list,1));
            %Note, column=donor while row=acceptor. Also note the diagonal
            %are both intramolecular and intermolecular combined. 
            for row=1:size(merged_rel_lines,1)
                for col=1:size(merged_rel_lines,2)
                    donor_indices = merged_indice_list{col,1}';    %relevant columns of rel_lines_matrix (nx1 matrix contains indices in orig. rel_lines_matrix)
                    acceptor_indices = merged_indice_list{row ,1}'; %relevant rows of rel_lines_matrix (nx1 matrix contains indices in orig. rel_lines_matrix)
                    [X,Y] = meshgrid(acceptor_indices,donor_indices);
                    curr_indices = [X(:) Y(:)]; %Each row is one pair of co-ordinates in rel_lines_matrix. So col 1=acceptor,col 2=donor.
                    %Merging all co-ordinate pairs in curr_indices. Each
                    %loop concats the next part on (assuming its not empty)
                    curr_rel_lines = obj.rel_lines_matrix{curr_indices(1,1),curr_indices(1,2)}; %Current entry into merged_rel_lines
                    
                    for i=2:size(curr_indices,1)
                        %Removing first line (from unit x-->y) and lines
                        %with rows "None Above Threshold"
                        temp_array = obj.rel_lines_matrix{curr_indices(i,1),curr_indices(i,2)};
                        temp_array = temp_array(2:end,1);
                        temp_array = temp_array(cellfun (@(x) isempty(strfind(x,'None above threshold')),temp_array));
                        
                        curr_rel_lines = [curr_rel_lines;temp_array];
                    end
                    %Putting this entry into the global merged_rel_lines
                    %cell array
                    if ~isempty(strfind(curr_rel_lines{2,1},'None above threshold')) && size(curr_rel_lines,1)>2;   %In case first entry has none above threshold
                        curr_rel_lines(2)=[];
                    end
                    merged_rel_lines{row,col} = curr_rel_lines;
                end
            end
            
            
            %***SECTION 5***
            %Overwriting old variables (rel lines/charge list/unit list)
            obj.rel_lines_matrix = merged_rel_lines;
            obj.unit_list = new_unit_list;  %1xn cell array containing molecular formulae of units defined in the NBO analysis (i.e molecules)
            obj.frag_charges = merged_charge_list; %1xn cell array containing NBO charge for the relevant entry in unit list
            
            %***SECTION 6***
            %Updating the DA matrix
            DA_array(size(obj.rel_lines_matrix,1),size(obj.rel_lines_matrix,2)) = A_D_NBO_E2_Class_V1;
            
            for row=1:size(DA_array,1)
                for col=1:size(DA_array,2)
                    DA_array(row,col) = A_D_NBO_E2_Class_V1(obj.unit_list{col,1},obj.unit_list{row,1},obj.rel_lines_matrix{row,col});
                end
            end
            obj.DA_matrix = DA_array;

            
            
        end
        
        %Returns two nxn matrices (n=number fragments) in same order as the
        %DA_matrix summarises the identity of the strongest D-->A hydrogen
        %bond between fragments
        %INPUT:
        %donor_frag = indice in unit_list of the donor frag
        %acceptor_frag = indice in unit_list of the acceptor frag
        %NOTE:Wont work for systems>1000 atoms im sure. for >100 atoms are
        %written HA0=H100. Hence i already need to sub out A for "10". Not
        %sure what notation is used for H1021 for example
        function [top_hbond_E2_vals top_hbond_info_matrix] = extract_strongest_hbond_e2(obj,donor_frag,acceptor_frag)
           [top_hbond_E2_vals top_hbond_info_matrix] = obj.DA_matrix(acceptor_frag,donor_frag).extract_strongest_hbond_e2    
        end
        
        %Populates the all_charges cell array. Note this re-imports the
        %entire file, hence takes about as long to run as the constructor
        function [] = get_charges(obj)
            %Import the file
            fobject1=fopen(obj.full_filename);
            full_file = fileread(obj.full_filename);
            full_file = strsplit(full_file,'\n')';
            fclose(fobject1);
            
            
            %Search strings for getting the natural charges out
            charges_start_str = 'Summary of Natural Population Analysis'; %Section start, table a lil further down
            charges_mid_str = 'Atom No';	%Table starts + 2 lines
            charges_end_str = '======================'; %End of table
            charges_matrix = cell(1,3);	%Element, Number(i.e order entered in .com file), charge
            charge_counter = 1;	%Tracks row of charges_matrix we're on
            at_charge_section=0;
            at_charge_table=0;
            
            
            %
            for i =1:size(full_file,1)
                if ~isempty(strfind(full_file{i},charges_start_str))	%Start of rel section
                    at_charge_section=1;
                end
                if at_charge_section==1 && ~isempty(strfind(full_file{i},charges_mid_str)) %Start of rel table
                    at_charge_table=1;
                    i=i+1;
                    %		i=i+3;	%Gets to first entry in the charge table(probably)[doesnt actually work. fuck knows why
                end
                
                if at_charge_table == 1 && isempty(strfind(full_file{i},'----')) %Pull charges out(yay)
                    if isempty(strfind(full_file{i},charges_end_str))
                        temp_cell = strsplit(strtrim(full_file{i,1}));
                        charges_matrix(charge_counter,1:3) = temp_cell(1:3);
                        charge_counter = charge_counter + 1;
                    else
                        at_charge_table = 0;
                        break;
                    end
                end
            end
            
            obj.all_charges = charges_matrix;
            
        end
        
    end
    
    
    
    
    
    methods(Static)
        % object_array = NBO_E2_Class_V2.Create_NBO_obj_array_folder to
        % run
        %Takes pull path to a folder (folder input var ldo) and returns an object array
        %containing NBO_E2_Class_V2 for all .sumviz files in folder
        %Also runs an error check to make sure atomic numbering is the same
        %for all the files, will throw error otherwise
        function [NBO_obj_array] = Create_NBO_obj_array_folder(folder)
           %Creating the object array
           file_list = dir(fullfile(folder,'*.log')); %nx1 structure containing all relevant file names
           NBO_obj_array(size(file_list,1),1) = NBO_E2_Class_V2;
           for file = 1:size(file_list,1)
 %             NBO_obj_array(file,1) = NBO_E2_Class_V2([folder '\' file_list(file,1).name]);
              
               
               NBO_obj_array(file,1) = NBO_E2_Class_V2(fullfile(folder,file_list(file,1).name));
               NBO_obj_array(file,1).get_charges;
           end
           %Running error check on geometry
           %First check number of atoms is constant throughout:
           numb_atoms = NaN(size(NBO_obj_array,1),1)
           for i=1:size(numb_atoms,1)
               numb_atoms(i,1) = size(NBO_obj_array(i,1).all_charges,1)
           end
           %numb_atoms = numb_atoms - numb_atoms(1,1)
           if any(numb_atoms- numb_atoms(1,1))
               for i=1:size(NBO_obj_array,1)
                   disp([num2str(size(NBO_obj_array(i,1).all_charges,1)) ' Atoms in '  NBO_obj_array(i,1).base_fname])
               end
               error(['Inconsistent Number of atoms in folder ' folder])
           end
           %Now check geometry is constant throughout
           all_geoms = cell(numb_atoms(1,1),size(NBO_obj_array,1));	%Each atom=1 row, each file = 1 col
           for j=1:size(all_geoms,2)
               all_geoms(:,j) = NBO_obj_array(j,1).all_charges(:,1)
           end
           
           for j=1:size(all_geoms,2)
               if ~all(strcmp(all_geoms(:,1),all_geoms(:,j)))
                   error(['Atomic Numbering is not constant within folder ' folder '.Inconsistencies in files  ' NBO_obj_array(1,1).base_fname  ' and ' NBO_obj_array(j,1).base_fname])
               end
           end
        end

    end
    
end

