classdef A_D_NBO_E2_Class_V1
    %A_D_NBO_E2_Class_V1 Creates an object that holds all info on a
    %specific donor->acceptor relationship. used with NBO_E2_Class_V*
    %   Constructor requires the formula for the donor unit (str,1st argument),
    %   acceptor unit (str,2nd argument) and a cell array containing the
    %   relevant lines from the .log file (entries in obj.rel_lines_matrix)
    %
    
    %ISSUES:
    %Wondering if i should only treat non-rydberg hydrogen acceptor things
    %as h bnds, rather than all with H as acceptor.
    
    %HISTORY:
    %Dec 2016: Code completed (rough date)
    %Jan 2017: hbond_info now should get ALL hydrogen atoms, previously
    %          those with labels>100 would be missed as these had the format
    %          "H102=HA2"
    
    
    properties
        donor; %chemical formula for the donor unit
        acceptor; %chemical formula for the acceptor unit
        all_info;   %5 col cell array. col 1/2 = donor/acceptor MOs, col 3= E2 value in KJ MOL, 4 = Energy gap between donor/acceptor MOs in a.u, 5 = the f(i,j) value 
        hbonds_info % Same as all_info but only contains interactions whereby the acceptor group is a hydrogen atom 
        vals;   %boolean. true if interactions above printing threshold detected, false if "None above threshold"
        e2_sum_all; %Sum (in kJ mol) of all the donor->acceptor E(2) interactions
        e2_max_all; %Value (in kJ mol) for strongest D-->A E(2) interaction
        hbond_sum_all; %Sum (in kJ mol) of all the donor->acceptor E(2) interactions involving hydrogben atoms as acceptors
        hbond_max; %Value (in kJ mol) for strongest D-->A E(2) interaction involving hydrogen atoms as acceptors
        
        %Some mad ramblings
                %'[space]H[.][\d]' in the acceptor column of all_info is
                %probably best criterion for seperating hydrogen bonding vs
                %other interactions. Edge cases would be hafnium (Hf) and
                %He. Searching for 'H' may well do the same thing. (So put
                %in the relevant error checks when creating the H-bonding
                %sum/strnogest thingies
                %OTOH surely i ccan do '[space]H[\d space][\d]' as the
                %search regexp to get the hydrogen bond contribs
    end
    
    methods
        function obj = A_D_NBO_E2_Class_V1(donor,acceptor,file_output)  %file_output is a nx1 ccell array
            if nargin>0 %Needs to do nothing if given no args, error msg would stop initiation of object arrays
            
            obj.donor = donor;
            obj.acceptor = acceptor;
            obj.all_info = cell(size(file_output,1)-1,5);
            
            %****Section 2**** Extracting all info into cell array
            if ~isempty(strfind(file_output{2,1},'None above threshold'))
                obj.vals=false;
                obj.all_info =[];
                return;
            else
                obj.vals=true;
            for i=1:size(obj.all_info,1)
                    curr_line = file_output{i+1,1};
                    obj.all_info{i,1} = curr_line(1:22);	%str containing info on donor MO
                    obj.all_info{i,2} = curr_line(29:51);			%str contaninig info on acceptr MO
                    numbers = curr_line(55:end);
                    obj.all_info(i,3:5) = strsplit(strtrim(numbers),' '); %1st col=E(2),2nd col = ej-ei, 3rd col = fij
            end
            
            %Converting strings to numbers for the last 3 cols 
            %Also converting the E2 vals in kJ mol
            for i=1:size(obj.all_info,1)
                obj.all_info{i,3} = str2num(obj.all_info{i,3}) * 4.1840;
                obj.all_info{i,4} = str2num(obj.all_info{i,4});
                obj.all_info{i,5} = str2num(obj.all_info{i,5});
            end
            
            
            end
            
            %****SECTION 3****%
            %Creating various sums of e2/strongest e2 vals
            %cell2mat(b.all_info(:,3))
            obj.e2_sum_all = sum(cell2mat(obj.all_info(:,3)));
            obj.e2_max_all = max(cell2mat(obj.all_info(:,3)));
            
            
            
            %****SECTION 4****%
            %Creating hbonds_info. This is the same as all_info but is
            %filtered so that the acceptor group must be a hydrogen atom
            temp_array = obj.all_info;  %Dont want to modify actual all_info.
            hbond_count = 1;    %Tracks which row of hbonds_info to add to
            obj.hbonds_info = cell(1,5);    %
            for i=1:size(temp_array,1)
                temp_string = regexprep(temp_array{i,2},'\W',''); %2nd col=acceptr atom identities
                temp_string = regexprep(temp_string,'A','10');
                if ~isempty( regexp(temp_string,'H\d', 'once') );   
                    obj.hbonds_info(hbond_count,1:5) = temp_array(i,1:5);
                    hbond_count = hbond_count + 1;
                end
            end
            
            
            %****SECTION 5****%
            %Using hbonds_info to get some info
            obj.hbond_sum_all = sum(cell2mat(obj.hbonds_info(:,3)));
            obj.hbond_max  = max(cell2mat(obj.hbonds_info(:,3)));

            
        end
        end
        
        
        
        %Returns the total D-->A E2 for the strongest hydrogen bond in this
        %D->A object. If Dat = a donor atom and Aat=acceptor atom then this
        %will look for all D-->A relationships with Dat involved as donor
        %and Aat as acceptor, and sum these up. Does this for every atom
        %involved in acceptor and donor relationships, and returns
        %info/E2 value for the top one.
        
        %INPUT:
        %varargin = 'allatoms' removes the restriction to hydrogen bond.
        %i.e considers interactions between atoms of any elements. NOT
        %SUPPORTED. GAVE UP when i realised this only makes sense for
        %hydrogen bonds since H has only 1 covalent bond. otherwise you'll
        %get O-H-C hbond, and C will always be the important atom, since
        %all O-->H1-C donation is picked up by both carbon and hydrogen, but
        %there will probably be some O-->H2-C interaction as well, which
        %will make it look like the C is the top acceptor atom rather than
        %the H.
        %OUTPUT:
        %top_hbond_E2 = The total E2 value for the strongest hydrogen bond
        %NOTE:Wont work for systems>1000 atoms im sure. for >100 atoms are
        %written HA0=H100. Hence i already need to sub out A for "10". Not
        %sure what notation is used for H1021 for example
        %ISSUES:
        %Step 3 of the code is VERY slow: (20seconds for P66614 fragment,
        %should be an upper limit but still nasty). Scaling is probably
        %quadratic, its actually reasonably fast for small numbers of D-->A
        %interactions.
        function [top_hbond_E2 top_hbond_info] = extract_strongest_hbond_e2(obj,varargin)
            
            %------>Step 1: Make copy of info matrices + get rid of all
            %spaces on the donor/acceptor columns (col 1/2). This makes it
            %easier for me to identify the exact atoms involved.
            %ALSO output values for cases where no donor-->acceptor
            %relationships exist
            hbond_only = 1; %Set to zero (with varargin) to lift the "hydrogen bonds only" restriction. Then allows acceptor atom to be any element
            if hbond_only==1;
                info_matrix = obj.hbonds_info;
            else
                info_matrix = obj.all_info;
            end
            
            %Edge case: no interactions at all aparently means the
            %info_matrix will not be a cell array (not even an empty one)
            if isempty(info_matrix)
                top_hbond_E2 = 0;
                top_hbond_info = [];
                return

            end
            
            if isempty(info_matrix{1,1})
                top_hbond_E2 = 0;
                top_hbond_info = [];
                return
            end
            
            
            %Get rid of both the "A"(placeholder for "10") and whitespaces
            %in the donor/acceptor relationships. Easier to pull out atom
            %numbers
            for i=1:size(info_matrix,1)
               info_matrix{i,1} = regexprep(info_matrix{i,1},' ','');
               info_matrix{i,1} = regexprep(info_matrix{i,1},'A','10');
               info_matrix{i,2} = regexprep(info_matrix{i,2},' ','');
               info_matrix{i,2} = regexprep(info_matrix{i,2},'A','10');
            end
            
            
            %----->STEP 2:
            %Extract a list of donor atoms and acceptor atoms with their
            %numbers (e.g "H20" or "N18"). Each goes in its own cell array
            donor_counter = 1; 
            acceptor_counter = 1; 
            all_donors = cell(1,1); % Each row contains entry of form 'C14', where C14 is the identity of a donor atom
            all_acceptors = cell(1,1); % Each row contains entry of form 'H3', where H3 is the identity of an acceptor atom
            
            
            
            for i=1:size(info_matrix,1)
                curr_donors = regexp(info_matrix{i,1},'[A-Za-z]+\d+','match');  %cell array containing donors of current D-->A transition
                if hbond_only==1
                    curr_acceptors = regexp(info_matrix{i,2},'H\d+','match'); %cell array containing acceptor atoms fo current D-->A transition
                else
                    curr_acceptors = regexp(info_matrix{i,2},'[A-Za-z]+\d+','match');
                end
                
                %Append donors to donor cell array
                for j=1:size(curr_donors,2) 
                    if ~any(strcmp(curr_donors{1,j},all_donors)) %Tests to see if entry already in "all_donors"
                        all_donors{donor_counter,1} = curr_donors{1,j};
                        donor_counter = donor_counter + 1;
                    end
                end
                %Append Acceptors to acceptor cell array
                for j=1:size(curr_acceptors,2) 
                    if ~any(strcmp(curr_acceptors{1,j},all_acceptors)) %Tests to see if entry already in "all_donors"
                        all_acceptors{acceptor_counter,1} = curr_acceptors{1,j};
                        acceptor_counter = acceptor_counter + 1;
                    end
                end
            end
            
            
            %----->STEP 3:
            %Create a matrix (n diff donor atoms * n diff acc atoms) holding the totals for all 
            % donor atom-->acceptor atom E(2) values
            
            
            %Each entry in all_da_atoms contains sum of D-->A relationships between atoms.  
            %So entry(3,1) is sum of donor interactions between atoms in 
            %all_donors{3} and all_acceptors{1}
           
            all_da_atoms = zeros(size(all_donors,1),size(all_acceptors,1));   
            tic
            for donor=1:size(all_da_atoms,1)    %Row = donor atom, col = acceptor atom
                donor_regexp = [all_donors{donor,1} '([^0-9]|$)']; %Used to search for the donor string
                for acceptor = 1:size(all_da_atoms,2)
                    accept_regexp = [all_acceptors{acceptor,1} '([^0-9]|$)'];
                    curr_sum = 0;
                    for i=1:size(info_matrix,1)
                        if ~isempty(regexp(info_matrix{i,1},donor_regexp)) && ~isempty(regexp(info_matrix{i,2},accept_regexp));
                            curr_sum = curr_sum + info_matrix{i,3};
                        end
                    end
                    all_da_atoms(donor,acceptor) = curr_sum;
                end
            end
            toc;
            
            %'C20[^0-9]'
            
            
            %----->STEP 4:
            %Pull the max E2 values out and output it. gg.
            [top_hbond_E2,loc] = max(all_da_atoms(:)); % gets location/value Using 1-D indexing
            [donor_indice,acceptor_indice] = ind2sub(size(all_da_atoms),loc);    %Transforms the 1-D indice value into the 2-D value
            donor_atom = all_donors{donor_indice};
            acceptor_atom = all_acceptors{acceptor_indice};
            
            
            top_hbond_info=[donor_atom '-' acceptor_atom];
        end
        
        
    end
    
end

