Skip to content

Commit

Permalink
changed behaviour when deleting bond: connected atoms with no further…
Browse files Browse the repository at this point in the history
… neighbours are not automatically deleted anymore
  • Loading branch information
thsa committed Jul 12, 2024
1 parent e47bcbc commit 4ff0703
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 117 deletions.
21 changes: 4 additions & 17 deletions src/main/java/com/actelion/research/chem/Molecule.java
Original file line number Diff line number Diff line change
Expand Up @@ -1675,24 +1675,11 @@ public void swapBonds(int bond1, int bond2) {
* @param atom
*/
public void deleteAtom(int atom) {
for (int bnd=0; bnd<mAllBonds; bnd++) {
for (int i=0; i<2; i++) {
if (mBondAtom[i][bnd] == atom) {
for (int bnd=0; bnd<mAllBonds; bnd++)
for (int i=0; i<2; i++)
if (mBondAtom[i][bnd] == atom)
mBondType[bnd] = cBondTypeDeleted; // mark for delete
int bonds = 0;
for (int j=0; j<mAllBonds; j++) {
if (j == bnd) continue;
if ((mBondAtom[0][j] == mBondAtom[1-i][bnd])
|| (mBondAtom[1][j] == mBondAtom[1-i][bnd]))
bonds++;
}
if (bonds == 0) {
removeMappingNo(mAtomMapNo[mBondAtom[1-i][bnd]]);
mAtomicNo[mBondAtom[1-i][bnd]] = -1;
} // mark for delete
}
}
}

removeMappingNo(mAtomMapNo[atom]);
mAtomicNo[atom] = -1; // mark for delete
if (mAtomList != null)
Expand Down
113 changes: 68 additions & 45 deletions src/main/java/com/actelion/research/chem/SmilesAtomParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,6 @@ else if ((smiles[position-1] == 'A' || smiles[position-1] == 'a') && allowSmarts
return position;
}

private int advanceToNextOption(byte[] smiles, int position) {
int start = position;
int level = 0;
while ((position < smiles.length && smiles[position] != ',' && smiles[position] != ']') || level != 0) {
if (smiles[position] == '[')
level++;
else if (smiles[position] == ']')
level--;
position++;
}
return position+1;
}

private int advanceJustAfterClosingBracket(byte[] smiles, int position) throws Exception {
int level = 0;
while (position < smiles.length && (smiles[position] != ']' || level != 0)) {
Expand All @@ -124,39 +111,23 @@ else if (smiles[position] == ']')

/**
* @param smiles
* @param position
* @param endIndex
* @param option index for ','-separated atom alternatives as non-negated recursive SMARTS
* @return
* @throws Exception
*/
protected int parseAtomInsideBrackets(byte[] smiles, int position, int endIndex, int option) throws Exception {
while (option > 0) {
position = parseAtomInsideBrackets(smiles, position, endIndex, true, true);
option--;
}

return parseAtomInsideBrackets(smiles, position, endIndex, true, true);
}

/**
* @param smiles
* @param position
* @param position points to second character of atom description, e.g. of the atom label
* @param endIndex
* @param allowSmarts
* @return
* @return position of first character after closing ']' (or delimiting ',' if enumerating SMARTS)
* @throws Exception
*/
protected int parseAtomInsideBrackets(byte[] smiles, int position, int endIndex, boolean allowSmarts, boolean allowOptions) throws Exception {
protected int parseAtomInsideBrackets(byte[] smiles, int position, int endIndex, boolean allowSmarts, boolean allowAtomOptions) throws Exception {
if (smiles[position-1] == '$') { // recursive SMARTS
recursiveSmartsList = new ArrayList<>();
position += parseRecursiveGroup(smiles, position-1, recursiveSmartsList) - 1;

if (smiles[position++] != ']') {
if (!allowOptions)
if (!allowAtomOptions)
throw new Exception("SmilesParser: A positive recursive SMARTS followed by another one or by atom query features is not supported. Position:" + (position - 1));

position = advanceJustAfterClosingBracket(smiles, position);
if ((mMode & SmilesParser.MODE_ENUMERATE_SMARTS) == 0)
position = advanceJustAfterClosingBracket(smiles, position);
}

return position;
Expand Down Expand Up @@ -319,7 +290,9 @@ else if (!mayBeAromatic)
continue;
}

if (smiles[position] == 'D') { // non-H-neighbours
if (smiles[position] == 'D' // number of explicit neighbours (incl. explicit H)
|| smiles[position] == 'd') { // (RDKit extension) number of non-H-neighbours
// we translate both to the number of non-H neighbours (for 'D' we assume no explicit H to be present)
position++;
position += range.parse(position, 1, 1);
long flags = 0;
Expand Down Expand Up @@ -347,7 +320,7 @@ else if ((atomQueryFeatures & Molecule.cAtomQFNeighbours) != 0)
continue;
}

if (smiles[position] == 'z' && mAllowCactvs) { // electro-negative neighbour count (CACTVS extension)
if (smiles[position] == 'z' && mAllowCactvs) { // electro-negative neighbour count (CACTVS,RDKit extension)
position++;
position += range.parse(position, 1, 4);
long flags = 0;
Expand Down Expand Up @@ -490,11 +463,33 @@ else if (!range.isRange())
continue;
}

if (smiles[position] == '^') { // RDKit hybridisation is translated into number of pi-electrons
position++;

int hybridization = smiles[position++] - '0';

if (hybridization < 1 || hybridization > 3)
throw new Exception("SmilesParser: Unsupported hybridization. Position:"+position);

long piElectrons = (hybridization == 1) ? Molecule.cAtomQFNot2PiElectrons
: (hybridization == 2) ? Molecule.cAtomQFNot1PiElectron : Molecule.cAtomQFNot0PiElectrons;

if (!isNot)
piElectrons = Molecule.cAtomQFPiElectrons & ~piElectrons;

atomQueryFeatures |= piElectrons;

continue;
}

if (smiles[position] == '$') { // recursive SMARTS
if (!isNot)
throw new Exception("SmilesParser: non-negated recursive SMARTS relating to preceding atom are not supported yet. Position:"+position);

position += parseRecursiveGroup(smiles, position, getExcludeGroupList());
if (excludeGroupList == null)
excludeGroupList = new ArrayList<>();

position += parseRecursiveGroup(smiles, position, excludeGroupList);
continue;
}

Expand All @@ -504,19 +499,28 @@ else if (!range.isRange())
continue;
}

if (allowSmarts && (smiles[position] == ',' && isRepeatedAllowedORFeature(smiles, position, skipCount))) { // we allow OR-logic for some query options if they have the same type
if (allowSmarts && smiles[position] == ',' && isRepeatedAllowedORFeature(smiles, position, skipCount)) { // we allow OR-logic for some query options if they have the same type
smartsFeatureFound = true;
position += skipCount[0] + 1;
continue;
}

if (allowSmarts && smiles[position] == ',' && (mMode & SmilesParser.MODE_ENUMERATE_SMARTS) != 0) {
smartsFeatureFound = true;
position += 1;
break;
}

if (smiles[position] == ',')
throw new Exception("SmilesParser: alternative atom definitions not supported. (Tip: enumerate SMARTS): '"+(char)smiles[position]+"', position:"+position);

throw new Exception("SmilesParser: unexpected character inside brackets: '"+(char)smiles[position]+"', position:"+position);
}

return position;
}

protected boolean parseAtomLabelInBrackets(byte[] smiles, int position, int endIndex, AtomLabelInfo info) throws Exception {
private boolean parseAtomLabelInBrackets(byte[] smiles, int position, int endIndex, AtomLabelInfo info) throws Exception {
info.mayBeAromatic = true;
info.mayBeAliphatic = true;
if (smiles[position] == '#') {
Expand All @@ -538,12 +542,30 @@ protected boolean parseAtomLabelInBrackets(byte[] smiles, int position, int endI
if (smiles[position] >= 'A' && smiles[position] <= 'Z') {
info.labelLength = (smiles[position+1] >= 'a' && smiles[position+1] <= 'z') ? 2 : 1;
info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8));
if (info.labelLength == 2 && info.atomicNo == 0) {
info.labelLength = 1;
info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8));
}
info.mayBeAromatic = false;
if (info.atomicNo == 0)
throw new Exception("SmilesParser: Unknown atom label. position:"+(position-1));
return true;
}

if (smiles[position] >= 'a' && smiles[position] <= 'z') {
info.labelLength = (smiles[position+1] >= 'a' && smiles[position+1] <= 'z') ? 2 : 1;
if ((smiles[position] == 'A' && smiles[position+1] == 's')
|| (smiles[position] == 'S' && smiles[position+1] == 'e')) {
info.labelLength = 2;
info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8));
info.mayBeAliphatic = false;
return true;
}

if (smiles[position] == 'c'
|| smiles[position] == 'n'
|| smiles[position] == 'o'
|| smiles[position] == 'p'
|| smiles[position] == 's') {
info.labelLength = 1;
info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8));
info.mayBeAliphatic = false;
return true;
Expand Down Expand Up @@ -646,7 +668,10 @@ else if (smiles[endIndex] == ')')
throw new Exception("SmilesParser: Missing closing ')' for recursive SMARTS. '('-position:"+(dollarIndex+1));

StereoMolecule group = new StereoMolecule(16, 16);
new SmilesParser(mMode).parse(group, smiles, dollarIndex+2, endIndex-1);
group.setFragment(true);
SmilesParser parser = new SmilesParser(mMode);
parser.setEnumerationPositionList(mParentParser.getEnumerationPositionList());
parser.parse(group, smiles, dollarIndex+2, endIndex-1);
groupList.add(group);

if (smiles[dollarIndex-1] == '!')
Expand Down Expand Up @@ -705,8 +730,6 @@ public boolean atomQueryFeaturesFound() {
}

public ArrayList<StereoMolecule> getExcludeGroupList() {
if (excludeGroupList == null)
excludeGroupList = new ArrayList<>();
return excludeGroupList;
}

Expand Down
Loading

0 comments on commit 4ff0703

Please sign in to comment.