-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from rdkit import Chem | ||
from rdkit.Chem import rdMolDescriptors | ||
|
||
def atom_in_multiple_small_rings(mol): | ||
""" | ||
Check if any atom is in two or more small (3- or 4-membered) rings. | ||
Args: | ||
mol: RDKit molecule object. | ||
Returns: | ||
bool: True if any atom is in two or more small rings, False otherwise. | ||
""" | ||
# Get all ring information | ||
ring_info = mol.GetRingInfo() | ||
atom_rings = ring_info.AtomRings() # Get the atom indices for each ring | ||
|
||
# Create a dictionary to count how many rings each atom is part of | ||
atom_ring_count = {} | ||
for ring in atom_rings: | ||
# Only count small rings (3- or 4-membered) | ||
if len(ring) <= 4: | ||
for atom_idx in ring: | ||
if atom_idx in atom_ring_count: | ||
atom_ring_count[atom_idx] += 1 | ||
else: | ||
atom_ring_count[atom_idx] = 1 | ||
|
||
# Check if any atom is in more than one small ring | ||
for count in atom_ring_count.values(): | ||
if count > 1: | ||
return True # Atom is in multiple small rings | ||
|
||
return False # No atom is in multiple small rings | ||
|
||
def double_bond_in_small_ring(mol): | ||
""" | ||
Check if any small (3- or 4-membered) ring contains a double bond. | ||
Args: | ||
mol: RDKit molecule object. | ||
Returns: | ||
bool: True if a double bond exists inside a small ring, False otherwise. | ||
""" | ||
# Get all ring information | ||
ring_info = mol.GetRingInfo() | ||
bond_rings = ring_info.BondRings() # Get the bond indices for each ring | ||
|
||
# Iterate through the rings | ||
for ring in bond_rings: | ||
if len(ring) <= 4: # Only small rings (3- or 4-membered) | ||
for bond_idx in ring: | ||
bond = mol.GetBondWithIdx(bond_idx) | ||
if bond.GetBondType() == Chem.rdchem.BondType.DOUBLE: | ||
return True # Double bond found in a small ring | ||
|
||
return False # No double bonds in small rings | ||
|
||
def filter_molecules(mol): | ||
""" | ||
Filter molecules based on the two rules: | ||
1. No atom is part of two small rings. | ||
2. No double bond inside small rings. | ||
Args: | ||
molecules: List of RDKit molecule objects. | ||
Returns: | ||
List of filtered molecule objects. | ||
""" | ||
if not atom_in_multiple_small_rings(mol) and not double_bond_in_small_ring(mol): | ||
return True | ||
else: | ||
return False | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import os | ||
import shutil | ||
|
||
def get_unique_filename(dst_folder, filename): | ||
""" | ||
Generates a unique filename by appending a counter if the file already exists in the destination folder. | ||
Args: | ||
dst_folder (str): The destination folder. | ||
filename (str): The original filename. | ||
Returns: | ||
str: A unique filename. | ||
""" | ||
base, extension = os.path.splitext(filename) | ||
counter = 1 | ||
new_filename = filename | ||
|
||
# Loop until we find a filename that does not exist | ||
while os.path.exists(os.path.join(dst_folder, new_filename)): | ||
new_filename = f"{base}_{counter}{extension}" | ||
counter += 1 | ||
|
||
return new_filename | ||
|
||
def gather_shifted_sdfs(src_root, dst_folder): | ||
""" | ||
Gather all *_shifted.sdf files from subfolders and copy them to a single destination folder. | ||
Renames files with a counter if they already exist. | ||
Args: | ||
src_root (str): The root folder containing subfolders. | ||
dst_folder (str): The folder where all the *_shifted.sdf files will be copied. | ||
""" | ||
if not os.path.exists(dst_folder): | ||
os.makedirs(dst_folder) | ||
|
||
# Walk through all directories and subdirectories | ||
for root, dirs, files in os.walk(src_root): | ||
for file in files: | ||
if file.endswith('_shifted.sdf'): | ||
# Get the unique filename to avoid overwriting | ||
unique_filename = get_unique_filename(dst_folder, file) | ||
|
||
# Build the full path to the source file | ||
full_file_path = os.path.join(root, file) | ||
|
||
# Copy the file to the destination folder with the unique filename | ||
shutil.copy(full_file_path, os.path.join(dst_folder, unique_filename)) | ||
print(f"Copied: {full_file_path} to {os.path.join(dst_folder, unique_filename)}") | ||
|
||
if __name__ == "__main__": | ||
src_root = '/home/yang2531/Documents/Bo_toolbox/PatWalters/Benchmarking_gene_model/Molsnapper/sample_MolDiff_20241002_214910_clash_rate_0.1_SDF' # Change this to your parent folder | ||
dst_folder = '/home/yang2531/Documents/Bo_toolbox/PatWalters/Benchmarking_gene_model/Molsnapper/sample_MolDiff_20241002_214910_clash_rate_0.1_SDF/combined_shifted_sdf' # Change this to your destination folder | ||
|
||
gather_shifted_sdfs(src_root, dst_folder) |