From efd1f08e74332c01d9e6f09a48029203aa2d5418 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Thu, 19 Sep 2024 15:46:10 +0200 Subject: [PATCH 01/10] print test in big frag multipole --- PoltypeModules/lFragmenterForDMA.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/PoltypeModules/lFragmenterForDMA.py b/PoltypeModules/lFragmenterForDMA.py index d69e1e10..737950ca 100644 --- a/PoltypeModules/lFragmenterForDMA.py +++ b/PoltypeModules/lFragmenterForDMA.py @@ -55,7 +55,7 @@ def saveFragment(mol, atomsToKeep, fragname, bondsToDelete=[]): def growFragment(atomidx, sdffile): mol = Chem.MolFromMolFile(sdffile,removeHs=False) - + print(mol) # 1. Atom in three rings # 2. Atom in two rings # 3. Atom in one ring @@ -267,5 +267,7 @@ def specialCaseSixFusedFiveMemRing(fragmol): global sdffile sdffile = sys.argv[1] - atom_id = int(sys.argv[2]) + atom_id = int(sys.argv[2]) + print(sdffile) + print(atom_id) growFragment(atom_id, sdffile) From 67ce456871a9e1d5c845761324c93a428e053d1a Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Thu, 19 Sep 2024 16:05:08 +0200 Subject: [PATCH 02/10] Atoms in Triple bond option --- PoltypeModules/lFragmenterForDMA.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/PoltypeModules/lFragmenterForDMA.py b/PoltypeModules/lFragmenterForDMA.py index 737950ca..ce2b7a31 100644 --- a/PoltypeModules/lFragmenterForDMA.py +++ b/PoltypeModules/lFragmenterForDMA.py @@ -103,6 +103,16 @@ def growFragment(atomidx, sdffile): if len(match) == 1: atomsInSixMemRing.append(match[0]) + atomsInTripleBond = [] + pattern = Chem.MolFromSmarts('[*#*]') + matches = mol.GetSubstructMatches(pattern) + for match in matches: + if len(match) == 1 + atomsInTripleBond.append(match[0]) + + print(atomsInTripleBond) + + # Case 1: the atom is in three rings if (atomidx - 1) in atomsInThreeRing: atomsToKeep = [] From 2a57c0b40946fb02625fa0bd89ce9656b73b440f Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Thu, 19 Sep 2024 16:38:45 +0200 Subject: [PATCH 03/10] Fix identation and SMART pattern --- PoltypeModules/lFragmenterForDMA.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PoltypeModules/lFragmenterForDMA.py b/PoltypeModules/lFragmenterForDMA.py index ce2b7a31..be21940d 100644 --- a/PoltypeModules/lFragmenterForDMA.py +++ b/PoltypeModules/lFragmenterForDMA.py @@ -104,10 +104,10 @@ def growFragment(atomidx, sdffile): atomsInSixMemRing.append(match[0]) atomsInTripleBond = [] - pattern = Chem.MolFromSmarts('[*#*]') + pattern = Chem.MolFromSmarts('$([*#*])') matches = mol.GetSubstructMatches(pattern) for match in matches: - if len(match) == 1 + if len(match) == 1: atomsInTripleBond.append(match[0]) print(atomsInTripleBond) From c7224946ef7769991155f7cb2d0b9c37add6c2f4 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Fri, 20 Sep 2024 14:29:20 +0200 Subject: [PATCH 04/10] Start to chop off fragment --- PoltypeModules/lFragmenterForDMA.py | 83 ++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/PoltypeModules/lFragmenterForDMA.py b/PoltypeModules/lFragmenterForDMA.py index be21940d..3cbe17d1 100644 --- a/PoltypeModules/lFragmenterForDMA.py +++ b/PoltypeModules/lFragmenterForDMA.py @@ -104,7 +104,7 @@ def growFragment(atomidx, sdffile): atomsInSixMemRing.append(match[0]) atomsInTripleBond = [] - pattern = Chem.MolFromSmarts('$([*#*])') + pattern = Chem.MolFromSmarts('[$(*#*)]') matches = mol.GetSubstructMatches(pattern) for match in matches: if len(match) == 1: @@ -217,9 +217,88 @@ def growFragment(atomidx, sdffile): shutil.move(f"Frag_Atom{atomidx:03d}_0.mol", f"Frag_Atom{atomidx:03d}.mol") else: os.system(f"rm -f Frag_Atom{atomidx:03d}_0.mol") - + + # Case when atom is part of triple bond + if (atomidx -1) in atomsInTripleBond: + print('hey') + atomsToAdd = [] + atomsToKeep = [] + t1 = mol.GetAtomWithIdx(atomidx-1) + for neig in t1.GetNeighbors(): + neig_idx = neig.GetIdx() + atomsToAdd.append(neig_idx) + atomsToKeep.append(neig_idx) + + + for neig in atomsToAdd: + if neig in atomsInRing: + connectedAtoms = findConnectedAtoms(mol, atomsToAdd, atomsInRing) + print(connectedAtoms) + atomsToKeep += connectedAtoms + else: + + connectedAtoms = findConnectedAtomsGeneral(mol, atomsToAdd, atomidx, neig) + print(connectedAtoms) + print(atomsToKeep) + atomsToKeep += connectedAtoms + saveFragment(mol, atomsToKeep, f"Frag_Atom{atomidx:03d}.mol") + + if os.path.isfile(f"Frag_Atom{atomidx:03d}_0.mol"): + testmol = Chem.MolFromMolFile(f"Frag_Atom{atomidx:03d}.mol",removeHs=False) + if len(testmol.GetAtoms()) == len(mol.GetAtoms()): + shutil.move(f"Frag_Atom{atomidx:03d}_0.mol", f"Frag_Atom{atomidx:03d}.mol") + else: + os.system(f"rm -f Frag_Atom{atomidx:03d}_0.mol") + + + + return + +def findConnectedAtomsGeneral(mol, atomsToadd, atomidx, neig): + + print('neigh to check for', neig) + Break = False + prev_atm_idx = atomidx - 1 + current_atm_idx = neig + + Co_atoms = [] + while not Break: + print('hello') + Neig = get_neigh_atoms(mol, current_atm_idx, prev_atm_idx) + print('Dict',Neig) + + if len(Neig) == 1: + Break = False + print('Check dict') + print(Neig.keys) + prev_atm_idx = current_atm_idx + current_atm_idx = list(Neig.keys())[0] + else: + Break = True + + Co_atoms += list(Neig.keys()) + + + return Co_atoms + + +def get_neigh_atoms(mol, Current_Idx, Prev_Idx): + print('CUrrent Idx',Current_Idx) + print('Prev Idx',Prev_Idx) + C_atm = mol.GetAtomWithIdx(Current_Idx) + + Neighboor = {} + + for neib in C_atm.GetNeighbors(): + if neib.GetIdx() != Prev_Idx: + IDx = mol.GetAtomWithIdx(neib.GetIdx()) + Neighboor[neib.GetIdx()] = IDx.GetAtomicNum() + + return Neighboor + + # helper function to determine where to cut def findConnectedAtoms(rdkitmol, atomList, atomsInRing): # C-C single bond From ad6ee387659b123c0e8bdcba30241873f5e725b8 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Mon, 23 Sep 2024 11:02:55 +0200 Subject: [PATCH 05/10] Finish up first draft of script to fragment triple bond molecule --- PoltypeModules/lFragmenterForDMA.py | 117 ++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 23 deletions(-) diff --git a/PoltypeModules/lFragmenterForDMA.py b/PoltypeModules/lFragmenterForDMA.py index 3cbe17d1..a5c61d5d 100644 --- a/PoltypeModules/lFragmenterForDMA.py +++ b/PoltypeModules/lFragmenterForDMA.py @@ -220,29 +220,29 @@ def growFragment(atomidx, sdffile): # Case when atom is part of triple bond if (atomidx -1) in atomsInTripleBond: - print('hey') atomsToAdd = [] - atomsToKeep = [] + atomsToKeep = [atomidx-1] t1 = mol.GetAtomWithIdx(atomidx-1) for neig in t1.GetNeighbors(): neig_idx = neig.GetIdx() atomsToAdd.append(neig_idx) atomsToKeep.append(neig_idx) - + # Loop through the atoms to add and check: + # if the atom is in an aromatic ring + # if not, then try to find coonected atoms with specific rules for neig in atomsToAdd: + if neig in atomsInRing: connectedAtoms = findConnectedAtoms(mol, atomsToAdd, atomsInRing) - print(connectedAtoms) atomsToKeep += connectedAtoms - else: + else: connectedAtoms = findConnectedAtomsGeneral(mol, atomsToAdd, atomidx, neig) - print(connectedAtoms) - print(atomsToKeep) atomsToKeep += connectedAtoms - saveFragment(mol, atomsToKeep, f"Frag_Atom{atomidx:03d}.mol") + + saveFragment(mol, atomsToKeep, f"Frag_Atom{atomidx:03d}.mol") if os.path.isfile(f"Frag_Atom{atomidx:03d}_0.mol"): testmol = Chem.MolFromMolFile(f"Frag_Atom{atomidx:03d}.mol",removeHs=False) if len(testmol.GetAtoms()) == len(mol.GetAtoms()): @@ -250,47 +250,118 @@ def growFragment(atomidx, sdffile): else: os.system(f"rm -f Frag_Atom{atomidx:03d}_0.mol") - + return - return +def find_patern(mol,atm_to_check,pat_to_check): + + """ + This function find the atoms present in a specific pattern + and check if a given atom is part of the atom present in the + pattern. + + Inputs: + - mol: rdkit mol object + - atm_to_check: rdkit atom id to check + - pat_to_check: SMART string to check as pattern + + Outputs: + - is_present: if atom_to_check exist or not in the SMART pattern (bool) + - Search_pattern: list of rdkit atom ID present in the pattern + """ + + + Search_pattern = [] + pattern = Chem.MolFromSmarts(pat_to_check) + matches = mol.GetSubstructMatches(pattern) + for match in matches: + if len(match) == 1: + Search_pattern.append(match[0]) + + if atm_to_check in Search_pattern: + is_present = True + else: + is_present = False + + return is_present, Search_pattern def findConnectedAtomsGeneral(mol, atomsToadd, atomidx, neig): - print('neigh to check for', neig) + """ + This function attempt to find atoms connected to triple bond. + By default, it will cut just after an atom that does not have triple bond: + if the atom is part of triple bond, then stop after max two iterations. + + TODO: + - If atoms neig as more than 1 neighboor, then need to add more options + + Inputs: + - mol: RDKIT mol object + - atomsToadd: list of atom neighboor to main atom to check + - atomidx: main atom to check ID + - neig: neighboor atom of the main atom to check + + Outputs: + - Co_atoms: list of connected atoms + """ + Break = False prev_atm_idx = atomidx - 1 current_atm_idx = neig - Co_atoms = [] + cc = 0 + + # While loop until the check for connected atoms stops while not Break: - print('hello') + + # Grab the neighboor atoms of neig Neig = get_neigh_atoms(mol, current_atm_idx, prev_atm_idx) - print('Dict',Neig) - if len(Neig) == 1: - Break = False - print('Check dict') - print(Neig.keys) + if len(Neig) == 1 and list(Neig.keys())[0] not in Co_atoms: prev_atm_idx = current_atm_idx current_atm_idx = list(Neig.keys())[0] + + if find_patern(mol,current_atm_idx,'[*;R]')[0]: + atomsToAdd = list(Neig.keys()) + connectedAtoms = findConnectedAtoms(mol, atomsToAdd, find_patern(mol,current_atm_idx,'[*;R]')[1]) + atomsToAdd += connectedAtoms + Co_atoms += atomsToAdd + Break = True + else: + Co_atoms += list(Neig.keys()) + Break = False + else: + Co_atoms += list(Neig.keys()) Break = True - Co_atoms += list(Neig.keys()) - + cc += 1 + if cc == 2: + Break = True return Co_atoms def get_neigh_atoms(mol, Current_Idx, Prev_Idx): - print('CUrrent Idx',Current_Idx) - print('Prev Idx',Prev_Idx) + + """ + This function create a dictionnary such as: + {Atom ID: Atomic Num} + Make sure that none of previous atom are selected + + Inputs: + - mol: RDKIT mol object + - Current_Idx: Atom id to check + - Prev_Idx: Atom id of the previous atom (to skip) + + Outputs: + - Neighboor: dictionnary with the neighboored atoms + + """ C_atm = mol.GetAtomWithIdx(Current_Idx) Neighboor = {} - for neib in C_atm.GetNeighbors(): if neib.GetIdx() != Prev_Idx: IDx = mol.GetAtomWithIdx(neib.GetIdx()) From 083ad342ba4c892409a9510abfaa19adacfbaf50 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Thu, 26 Sep 2024 15:34:51 +0200 Subject: [PATCH 06/10] Cut triple bond at the neareast nieghbor --- PoltypeModules/lFragmenterForDMA.py | 34 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/PoltypeModules/lFragmenterForDMA.py b/PoltypeModules/lFragmenterForDMA.py index a5c61d5d..6e4cbc0a 100644 --- a/PoltypeModules/lFragmenterForDMA.py +++ b/PoltypeModules/lFragmenterForDMA.py @@ -110,7 +110,7 @@ def growFragment(atomidx, sdffile): if len(match) == 1: atomsInTripleBond.append(match[0]) - print(atomsInTripleBond) + print('Atoms in triple bond',atomsInTripleBond) # Case 1: the atom is in three rings @@ -227,21 +227,27 @@ def growFragment(atomidx, sdffile): neig_idx = neig.GetIdx() atomsToAdd.append(neig_idx) atomsToKeep.append(neig_idx) - + print('Atoms to add ',atomsToAdd) + print('Atoms in ring',atomsInRing) # Loop through the atoms to add and check: # if the atom is in an aromatic ring # if not, then try to find coonected atoms with specific rules for neig in atomsToAdd: if neig in atomsInRing: + print('Found atom in ring as first neighboor') connectedAtoms = findConnectedAtoms(mol, atomsToAdd, atomsInRing) atomsToKeep += connectedAtoms + elif neig in atomsInTripleBond: + print(f'Neig {neig} is in atomsinTripleBond') + atomsToKeep += [neig] else: + print(f'Neig {neig} is neither in atomsinTripleBOnd nor in atomsinRing') connectedAtoms = findConnectedAtomsGeneral(mol, atomsToAdd, atomidx, neig) atomsToKeep += connectedAtoms - + print('Final atoms to keep',atomsToKeep) saveFragment(mol, atomsToKeep, f"Frag_Atom{atomidx:03d}.mol") if os.path.isfile(f"Frag_Atom{atomidx:03d}_0.mol"): testmol = Chem.MolFromMolFile(f"Frag_Atom{atomidx:03d}.mol",removeHs=False) @@ -317,27 +323,27 @@ def findConnectedAtomsGeneral(mol, atomsToadd, atomidx, neig): # Grab the neighboor atoms of neig Neig = get_neigh_atoms(mol, current_atm_idx, prev_atm_idx) - + print(f'Neig of {current_atm_idx}: {Neig}') if len(Neig) == 1 and list(Neig.keys())[0] not in Co_atoms: prev_atm_idx = current_atm_idx current_atm_idx = list(Neig.keys())[0] - if find_patern(mol,current_atm_idx,'[*;R]')[0]: - atomsToAdd = list(Neig.keys()) - connectedAtoms = findConnectedAtoms(mol, atomsToAdd, find_patern(mol,current_atm_idx,'[*;R]')[1]) - atomsToAdd += connectedAtoms - Co_atoms += atomsToAdd - Break = True - else: - Co_atoms += list(Neig.keys()) - Break = False + #if find_patern(mol,current_atm_idx,'[*;R]')[0]: + # atomsToAdd = list(Neig.keys()) + # connectedAtoms = findConnectedAtoms(mol, atomsToAdd, find_patern(mol,current_atm_idx,'[*;R]')[1]) + # atomsToAdd += connectedAtoms + # Co_atoms += atomsToAdd + # Break = True + #else: + Co_atoms += list(Neig.keys()) + Break = False else: Co_atoms += list(Neig.keys()) Break = True cc += 1 - if cc == 2: + if cc == 1: Break = True return Co_atoms From 2df9a11093d9cd3c0638a07a5935966ca2d21bfd Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Tue, 1 Oct 2024 11:50:08 +0200 Subject: [PATCH 07/10] Add function to check pattern and set new_dma = true (using dma4) if triple bond is found --- PoltypeModules/lmodifytinkerkey.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/PoltypeModules/lmodifytinkerkey.py b/PoltypeModules/lmodifytinkerkey.py index f4b074a6..a301e1c2 100644 --- a/PoltypeModules/lmodifytinkerkey.py +++ b/PoltypeModules/lmodifytinkerkey.py @@ -241,6 +241,8 @@ def modkey2_fragmpole(poltype): pt.write(f"numproc={poltype.numproc}\n") pt.write(f"maxmem={poltype.maxmem}\n") pt.write(f"maxdisk={poltype.maxdisk}\n") + if check_if_pattern_exist(f,'[$(*#*)]'): + pt.write(f"new_dma=True\n") # Run Poltype Job os.chdir(os.path.join(homedir, 'Fragments_DMA', fname)) @@ -269,6 +271,23 @@ def modkey2_fragmpole(poltype): shutil.move('tmpkeyforfrag.key', key2) return + +def check_if_pattern_exist(sdffile,pattern_to_check): + + mol = Chem.MolFromMolFile(sdffile,removeHs=False) + + atomsInPattern = [] + pattern = Chem.MolFromSmarts(pattern_to_check) + matches = mol.GetSubstructMatches(pattern) + for match in matches: + if len(match) == 1: + atomsInPattern.append(match[0]) + + if len(atomsInPattern) > 0: + return True + else: + return False + # helper function to transfer multipole back to parent def transferMultipoleToParent(poltype, frag_sdf, frag_xyz, frag_key, parent_sdf, parent_xyz, parent_key): From 7f3b3fb90d4674b4fce7530db592ec02ec39dee4 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Tue, 1 Oct 2024 16:29:50 +0200 Subject: [PATCH 08/10] Add right path for sdf file --- PoltypeModules/lmodifytinkerkey.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PoltypeModules/lmodifytinkerkey.py b/PoltypeModules/lmodifytinkerkey.py index a301e1c2..fc29e6d9 100644 --- a/PoltypeModules/lmodifytinkerkey.py +++ b/PoltypeModules/lmodifytinkerkey.py @@ -241,7 +241,7 @@ def modkey2_fragmpole(poltype): pt.write(f"numproc={poltype.numproc}\n") pt.write(f"maxmem={poltype.maxmem}\n") pt.write(f"maxdisk={poltype.maxdisk}\n") - if check_if_pattern_exist(f,'[$(*#*)]'): + if check_if_pattern_exist(f'./{fname}/{f},'[$(*#*)]'): pt.write(f"new_dma=True\n") # Run Poltype Job From c4f2f315643d6870cb58fcd4c3ca69f67423cdc1 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Wed, 2 Oct 2024 11:30:01 +0200 Subject: [PATCH 09/10] fix typo --- PoltypeModules/lmodifytinkerkey.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PoltypeModules/lmodifytinkerkey.py b/PoltypeModules/lmodifytinkerkey.py index fc29e6d9..056e50aa 100644 --- a/PoltypeModules/lmodifytinkerkey.py +++ b/PoltypeModules/lmodifytinkerkey.py @@ -241,7 +241,7 @@ def modkey2_fragmpole(poltype): pt.write(f"numproc={poltype.numproc}\n") pt.write(f"maxmem={poltype.maxmem}\n") pt.write(f"maxdisk={poltype.maxdisk}\n") - if check_if_pattern_exist(f'./{fname}/{f},'[$(*#*)]'): + if check_if_pattern_exist(f'./{fname}/{f}','[$(*#*)]'): pt.write(f"new_dma=True\n") # Run Poltype Job From 8c5537ffc2d48fe8ec82947663cbf5caca3459f8 Mon Sep 17 00:00:00 2001 From: TimotheMelin Date: Wed, 2 Oct 2024 14:51:28 +0200 Subject: [PATCH 10/10] Fix typo --- PoltypeModules/lmodifytinkerkey.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PoltypeModules/lmodifytinkerkey.py b/PoltypeModules/lmodifytinkerkey.py index 056e50aa..7180cffb 100644 --- a/PoltypeModules/lmodifytinkerkey.py +++ b/PoltypeModules/lmodifytinkerkey.py @@ -242,7 +242,7 @@ def modkey2_fragmpole(poltype): pt.write(f"maxmem={poltype.maxmem}\n") pt.write(f"maxdisk={poltype.maxdisk}\n") if check_if_pattern_exist(f'./{fname}/{f}','[$(*#*)]'): - pt.write(f"new_dma=True\n") + pt.write(f"new_gdma=True\n") # Run Poltype Job os.chdir(os.path.join(homedir, 'Fragments_DMA', fname))