From 3e8ac2edad2f247e8d0f23c320791fd7d7b0b47b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Zasso?= Date: Fri, 19 Jan 2024 15:07:28 +0100 Subject: [PATCH] feat: update OCL to v2024.1.1 (#186) --- __tests__/__snapshots__/conformers.js.snap | 60 +- .../__snapshots__/force-field-mmff94.js.snap | 30 +- __tests__/__snapshots__/library.js.snap | 1 + openchemlib | 2 +- scripts/openchemlib/classes.js | 2 +- .../research/chem/AbstractDepictor.java | 25 +- .../com/actelion/research/chem/Canonizer.java | 10 +- .../research/chem/ExtendedDepictor.java | 6 +- .../research/chem/ExtendedMolecule.java | 277 +++++-- ...DCodeParserWithoutCoordinateInvention.java | 44 +- .../com/actelion/research/chem/Molecule.java | 213 +++-- .../actelion/research/chem/Molecule3D.java | 2 +- .../com/actelion/research/chem/Mutator.java | 10 +- .../actelion/research/chem/SSSearcher.java | 2 + .../research/chem/SSSearcherWithIndex.java | 12 +- .../actelion/research/chem/SmilesParser.java | 174 +++-- .../research/chem/StereoMolecule.java | 4 +- .../chem/StructureSearchSpecification.java | 26 +- .../research/chem/conf/ConformerSet.java | 3 +- .../chem/coords/CoordinateInventor.java | 9 +- .../AbstractDescriptorHandlerFP.java | 7 +- .../AbstractDescriptorHandlerLongFP.java | 7 +- .../chem/descriptor/DescriptorEncoder.java | 48 +- .../chem/descriptor/DescriptorHandler.java | 4 +- .../DescriptorHandlerIntVector.java | 4 +- .../DescriptorHandlerSkeletonSpheres.java | 3 +- .../chem/io/CompoundTableConstants.java | 1 + .../mcs/ExhaustiveFragmentGeneratorBonds.java | 7 +- .../chem/mcs/RunBondVector2IdCode.java | 2 +- .../{ => prediction}/PropertyCalculator.java | 7 +- .../prediction/TotalSurfaceAreaPredictor.java | 3 +- .../chem/reaction/ReactionEncoder.java | 17 +- .../chem/sar/CoreBasedSARAnalyzer.java | 606 +++++++++++++++ .../research/chem/sar/ExitVector.java | 88 +++ .../research/chem/sar/SARMolecule.java | 195 +++++ .../research/chem/sar/SARScaffold.java | 730 ++++++++++++++++++ .../research/chem/sar/SARScaffoldGroup.java | 103 +++ .../research/gui/LookAndFeelHelper.java | 9 +- .../gui/editor/GenericEditorArea.java | 76 +- .../gui/editor/GenericEditorToolbar.java | 4 +- .../gui/ChemistryGeometryHelper.java} | 34 +- .../research/share/gui/editor/Model.java | 5 +- .../actelion/research/util/ArrayUtils.java | 10 + .../util/EncoderFloatingPointNumbers.java | 4 +- .../research/util/EncoderIntegerNumbers.java | 4 +- .../com/actelion/research/util/ListUtils.java | 13 + .../research/util/datamodel/IntArray.java | 2 +- .../datamodel/table/TableModelString.java | 19 +- .../chem/conf/gen/ConformerGenerator.java | 17 +- .../chem/conf/gen/RigidFragmentCache.java | 3 +- .../chem/conf/gen/RigidFragmentProvider.java | 2 +- .../conf/so/ConformationSelfOrganizer.java | 71 +- .../gwt/core/JSMoleculeProperties.java | 2 +- .../gwt/gui/editor/GWTGeomFactory.java | 4 +- .../research/gwt/minimal/JSMolecule.java | 1 + types.d.ts | 1 + 56 files changed, 2595 insertions(+), 430 deletions(-) rename src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/{ => prediction}/PropertyCalculator.java (91%) create mode 100644 src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/CoreBasedSARAnalyzer.java create mode 100644 src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/ExitVector.java create mode 100644 src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/SARMolecule.java create mode 100644 src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/SARScaffold.java create mode 100644 src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/SARScaffoldGroup.java rename src/com/actelion/research/gwt/chemlib/com/actelion/research/{chem/ChemistryHelper.java => share/gui/ChemistryGeometryHelper.java} (94%) diff --git a/__tests__/__snapshots__/conformers.js.snap b/__tests__/__snapshots__/conformers.js.snap index e19bcdae..5a67ed4d 100644 --- a/__tests__/__snapshots__/conformers.js.snap +++ b/__tests__/__snapshots__/conformers.js.snap @@ -5,21 +5,21 @@ exports[`ConformerGenerator should generate conformers 1`] = ` Actelion Java MolfileCreator 1.0 15 14 0 0 0 0 0 0 0 0999 V2000 - -4.5770 2.5000 -2.1185 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.9333 3.2322 -3.4776 O 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8935 2.1799 -4.2575 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.2507 2.2660 -3.4812 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3324 1.6314 -1.9352 O 0 0 0 0 0 0 0 0 0 0 0 0 - -1.4253 -0.0302 -2.0151 N 0 0 0 0 0 0 0 0 0 0 0 0 - -5.2007 1.5768 -2.4320 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.7235 2.1678 -1.4110 H 0 0 0 0 0 0 0 0 0 0 0 0 - -5.2609 3.2524 -1.5646 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8052 2.4732 -5.3736 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.3008 1.1001 -4.1756 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.9084 3.3711 -3.4304 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.4952 1.6598 -4.1123 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.7255 -0.4000 -1.1042 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.5034 -0.4157 -2.2514 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6038 1.1775 -1.7436 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6164 -0.1245 -1.4739 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7449 -1.4436 -0.9286 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5103 -1.3101 0.8709 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4916 -0.0258 1.2038 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0764 -0.4433 0.8311 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8502 0.9124 -2.5818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2308 2.0933 -2.0679 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0263 1.4325 -0.7736 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3222 -2.4184 -1.1672 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7122 -1.4701 -1.4491 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5348 -1.1378 1.3795 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0432 -2.2915 1.2686 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6645 0.3987 0.8315 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4292 -1.1042 1.5348 H 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 0 0 0 2 3 1 0 0 0 0 3 4 1 0 0 0 0 @@ -43,21 +43,21 @@ exports[`ConformerGenerator should generate conformers 2`] = ` Actelion Java MolfileCreator 1.0 15 14 0 0 0 0 0 0 0 0999 V2000 - -5.4499 3.1652 -4.1799 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.9333 3.2322 -3.4776 O 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8935 2.1799 -4.2575 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.2507 2.2660 -3.4812 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.5676 3.7618 -3.7899 O 0 0 0 0 0 0 0 0 0 0 0 0 - 0.9379 3.8478 -3.0815 N 0 0 0 0 0 0 0 0 0 0 0 0 - -5.3836 3.5091 -5.2830 H 0 0 0 0 0 0 0 0 0 0 0 0 - -5.8544 2.0819 -4.1363 H 0 0 0 0 0 0 0 0 0 0 0 0 - -6.1706 3.8654 -3.6045 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8052 2.4732 -5.3736 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.3008 1.1001 -4.1756 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.5761 1.4326 -3.9186 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3564 2.1134 -2.3401 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.2462 4.8280 -3.0515 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6088 3.2972 -3.6302 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8495 -0.2172 -3.1263 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6164 -0.1245 -1.4739 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7449 -1.4436 -0.9286 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5103 -1.3101 0.8709 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9853 -1.4939 1.6379 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7789 -1.3729 3.2856 N 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4828 -1.1545 -3.3742 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4061 0.7284 -3.4912 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8269 -0.2910 -3.6630 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3222 -2.4184 -1.1672 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7122 -1.4701 -1.4491 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7806 -2.1350 1.2247 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0672 -0.2707 1.1218 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6949 -1.2726 3.7391 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3186 -2.2231 3.6343 H 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 0 0 0 2 3 1 0 0 0 0 3 4 1 0 0 0 0 diff --git a/__tests__/__snapshots__/force-field-mmff94.js.snap b/__tests__/__snapshots__/force-field-mmff94.js.snap index 591f7fc0..59cee5d8 100644 --- a/__tests__/__snapshots__/force-field-mmff94.js.snap +++ b/__tests__/__snapshots__/force-field-mmff94.js.snap @@ -5,21 +5,21 @@ exports[`ForceFieldMMFF94 should generate force field 1`] = ` Actelion Java MolfileCreator 1.0 15 14 0 0 0 0 0 0 0 0999 V2000 - -4.3423 2.6153 -2.3397 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.8783 2.9630 -3.6364 O 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8566 2.0874 -4.1292 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.4746 2.4010 -3.5536 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.0233 3.6899 -3.9928 O 0 0 0 0 0 0 0 0 0 0 0 0 - -0.2808 3.5370 -5.2408 N 0 0 0 0 0 0 0 0 0 0 0 0 - -4.7361 1.5945 -2.3324 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.5480 2.7187 -1.5962 H 0 0 0 0 0 0 0 0 0 0 0 0 - -5.1522 3.2997 -2.0724 H 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8395 2.2197 -5.2160 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.1330 1.0464 -3.9263 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.7528 1.6241 -3.8346 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.5055 2.4375 -2.4612 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.5834 4.0318 -5.0129 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.8047 4.1683 -5.8509 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7685 0.9053 -1.5976 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4013 -0.3647 -1.5301 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6128 -1.3588 -0.8641 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6782 -1.2484 0.6601 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.0034 -1.5294 1.1322 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1221 -2.9596 1.4011 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8075 0.8298 -2.1152 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4153 1.5787 -2.1667 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6276 1.3287 -0.5999 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0142 -2.3277 -1.1788 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5751 -1.3011 -1.2119 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9515 -1.9235 1.1286 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4393 -0.2333 0.9888 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4846 -2.9468 2.3562 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9205 -3.1978 0.8087 H 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 0 0 0 2 3 1 0 0 0 0 3 4 1 0 0 0 0 diff --git a/__tests__/__snapshots__/library.js.snap b/__tests__/__snapshots__/library.js.snap index 13639bcd..2199644b 100644 --- a/__tests__/__snapshots__/library.js.snap +++ b/__tests__/__snapshots__/library.js.snap @@ -650,6 +650,7 @@ exports[`static properties of Molecule 1`] = ` "cESRTypeAbs", "cESRTypeAnd", "cESRTypeOr", + "cHelperAll", "cHelperBitCIP", "cHelperBitIncludeNitrogenParities", "cHelperBitNeighbours", diff --git a/openchemlib b/openchemlib index f7678684..9c51e6ad 160000 --- a/openchemlib +++ b/openchemlib @@ -1 +1 @@ -Subproject commit f767868487998719be4dd09d52df499cd3d60721 +Subproject commit 9c51e6ad8a323e3ff7ce0c0a77d28f97709e8c32 diff --git a/scripts/openchemlib/classes.js b/scripts/openchemlib/classes.js index 88dc3bab..6464942a 100644 --- a/scripts/openchemlib/classes.js +++ b/scripts/openchemlib/classes.js @@ -33,7 +33,6 @@ const changed = [ changeSelfOrganizedConformer, ], ['@org/openmolecules/chem/conf/gen/RigidFragmentCache', removeCacheIO], - ['chem/ChemistryHelper', removePrintf], ['chem/Coordinates', removeToStringSpaceDelimited], ['chem/coords/InventorFragment', changeInventorFragment], ['chem/conf/BondLengthSet', changeBondLengthSet], @@ -53,6 +52,7 @@ const changed = [ ['chem/TextDrawingObject', changeTextDrawingObject], ['gui/editor/GenericEditorArea', changeGenericEditorArea], ['gui/editor/CustomAtomDialogBuilder', changeCustomAtomDialogBuilder], + ['share/gui/ChemistryGeometryHelper', removePrintf], ['share/gui/editor/Model', removePrintf], ['util/ArrayUtils', changeArrayUtils], ['util/datamodel/IntVec', changeIntVec], diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java index e8237b36..e64bfeb5 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java @@ -155,7 +155,7 @@ public abstract class AbstractDepictor { private boolean[] mAtomIsConnected; private boolean[] mAtomLabelDisplayed; private double mpBondSpacing,mpDotDiameter,mpLineWidth,mpQFDiameter,mpBondHiliteRadius, - mFactorTextSize,mpExcludeGroupRadius,mChiralTextSize; + mFactorTextSize,mpExcludeGroupRadius,mChiralTextSize,mAtomLabelAVBL; private int mpLabelSize,mStandardForegroundColor,mDisplayMode,mCurrentColor,mPreviousColor; private boolean mIsValidatingView; private ArrayList mpTabuZone; @@ -274,6 +274,19 @@ public void setTransformation(DepictorTransformation t) { } + /** + * Per default the size of the atom label font depends on the average bond length, + * which usually is determined before a molecule is drawn. + * This method allows to override the mechanism and to define an artificial + * "average bond length" that is used instead if the real one to serve as basis + * for the scale of atom labels, double bond distance, chiral text, etc... + * @param avbl + */ + public void setAtomLabelAVBL(double avbl) { + mAtomLabelAVBL = avbl / mTransformation.getScaling(); + } + + /** * Sets a multiplication factor to the text size of all labels. The default is 1.0. * @param factor text size factor @@ -403,8 +416,12 @@ public DepictorTransformation validateView(T context, GenericRectangle viewRect, } + /** + * @param viewRect + * @param mode + * @return incremental transformation that moves/scales already transformed molecule into viewRect + */ public DepictorTransformation simpleValidateView(GenericRectangle viewRect, int mode) { - // returns incremental transformation that moves/scales already transformed molecule into viewRect if (mMol.getAllAtoms() == 0) return null; @@ -576,12 +593,12 @@ private void updateBondHiliteColor() { private void calculateParameters() { - double averageBondLength = mTransformation.getScaling() * mMol.getAverageBondLength(); + double averageBondLength = mTransformation.getScaling() * (mAtomLabelAVBL != 0 ? mAtomLabelAVBL : mMol.getAverageBondLength()); mpLineWidth = averageBondLength * cFactorLineWidth; mpBondSpacing = averageBondLength * cFactorBondSpacing; mpBondHiliteRadius = averageBondLength * cFactorBondHiliteRadius; mpExcludeGroupRadius = averageBondLength * cFactorExcludeGroupRadius; - mpLabelSize = (int)(averageBondLength * mFactorTextSize * cFactorTextSize + 0.5); + mpLabelSize = (int)(averageBondLength * mFactorTextSize * cFactorTextSize + 0.5); mpDotDiameter = averageBondLength * cFactorDotDiameter; mpQFDiameter = averageBondLength * cFactorQFDiameter; mChiralTextSize = averageBondLength * cFactorChiralTextSize + 0.5f; diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java index 74083fd8..386b9775 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java @@ -2424,6 +2424,10 @@ public StereoMolecule getCanMolecule() { } + /** + * @param includeExplicitHydrogen + * @return canonical copy of this molecule + */ public StereoMolecule getCanMolecule(boolean includeExplicitHydrogen) { generateGraph(); @@ -2467,7 +2471,7 @@ else if (mol.getBondType(bond) == Molecule.cBondTypeDown) } mMol.copyMoleculeProperties(mol); - mMol.invalidateHelperArrays(Molecule.cHelperBitParities); + mol.invalidateHelperArrays(Molecule.cHelperBitParities); return mol; } @@ -3961,7 +3965,7 @@ private void cipCalcTHParity(int atom) { || mTHParity[atom] == Molecule.cAtomParity2)) { boolean invertedOrder = false; - if (mMol.getAtomPi(atom) == 2) { // allene + if (mMol.getAtomPi(atom) == 2 && mMol.getConnAtoms(atom) == 2) { // allene try { for (int i=0; i<2; i++) { int alleneAtom = mMol.getConnAtom(atom,i); @@ -4132,7 +4136,7 @@ private boolean cipComparePriority(int rootAtom, int atom1, int atom2) throws Ex delocalizedBondCount++; delocalizedMeanAtomicNo += mMol.getAtomicNo(candidate); } - else { + else if (candidate != rootAtom) { // treat double bond at rootAtom stereo center as single bond // add pseudo atoms for double and triple bonds for (int j=1; j 1) { + for (int i=0; i 1) { + for (int j=0; j 3 + && isAtomStereoCenter(atom)) { + int remainingNeighbours = 0; + int lostStereoBond = -1; + int lostAtom = -1; + for (int i=0; i=atom || atomMap.length>=mConnAtom[atom][i]) +// System.out.println("mConnAtom.length:"+mConnAtom.length+" atom:"+atom+" atomMap.length:"+atomMap.length+" i:"+i+" mConnAtom[atom][i]:"+mConnAtom[atom][i]+" mAtoms:"+mAtoms+" mAllAtoms:"+mAllAtoms); + if (atomMap.length>mConnAtom[atom][i] + && atomMap[mConnAtom[atom][i]] != -1) + remainingNeighbours++; + else if (mConnBondOrder[atom][i] == 1 + && isStereoBond(mConnBond[atom][i]) + && mBondAtom[0][mConnBond[atom][i]] == atom) { + lostStereoBond = mConnBond[atom][i]; + lostAtom = mConnAtom[atom][i]; + } + } + if (lostStereoBond != -1 + && remainingNeighbours >= 3) { + double angle = getBondAngle(atom, lostAtom); + double minAngleDif = 10.0; + int minConnBond = -1; + for (int i=0; imConnAtom[atom][i] + && atomMap[mConnAtom[atom][i]] != -1) { + double angleDif = Math.abs(getAngleDif(angle, getBondAngle(atom, mConnAtom[atom][i]))); + if (minAngleDif > angleDif) { + minAngleDif = angleDif; + minConnBond = mConnBond[atom][i]; + } + } + } + if (minConnBond != -1) { + int destBond = bondMap[minConnBond]; + destMol.setBondType(destBond, mBondType[minConnBond] == cBondTypeUp ? cBondTypeDown : cBondTypeUp); + if (mBondAtom[0][minConnBond] != atom) { + destMol.setBondAtom(1, destBond, atomMap[mBondAtom[0][minConnBond]]); + destMol.setBondAtom(0, destBond, atomMap[atom]); + } + } + } + } + } + } + + + private void rescueImplicitHigherValences(ExtendedMolecule destMol, int sourceAtomCount, int[] atomMap) { + destMol.ensureHelperArrays(Molecule.cHelperNeighbours); + for (int atom=0; atom * 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotopes, custom labelled hydrogen, etc.)
@@ -593,7 +721,6 @@ public int getFreeValence(int atom) { return getMaxValence(atom) - getOccupiedValence(atom); } - /** * The lowest free valence is the number of potential additional single bonded * neighbours to reach the atom's lowest valence above or equal its current @@ -601,13 +728,27 @@ public int getFreeValence(int atom) { * Thus, the phosphor atoms in PF2 and PF4 both have a lowest free valence of 1. * The oxygen in R-O(-) has a lowest free valence of 0, the nitrogen in R3N(+) * has a free valence of 1. If you need the maximum possible free valence, - * use getFreeValence(), which would give 6 for Cl(-) and HCl. + * use getFreeValence(), which would give 6 for Cl(-) and HCl.
+ * Of course, the lowest free valce depends on the atomic number. If this molecule + * is a fragment and if an atom list is associated with this atom, then the lowest free + * valence is calculated for all atomic numbers in the list and the highest of them is returned. * @param atom * @return */ public int getLowestFreeValence(int atom) { + if (!mIsFragment || mAtomList == null || mAtomList[atom] == null) + return getLowestFreeValence(atom, mAtomicNo[atom]); + + int valence = 0; + for (int atomicNo:mAtomList[atom]) + valence = Math.max(valence, getLowestFreeValence(atom, atomicNo)); + + return valence; + } + + protected int getLowestFreeValence(int atom, int atomicNo) { int occupiedValence = getOccupiedValence(atom); - int correction = getElectronValenceCorrection(atom, occupiedValence); + int correction = getElectronValenceCorrection(atom, occupiedValence, atomicNo); int valence = getAtomAbnormalValence(atom); if (valence == -1) { @@ -930,9 +1071,34 @@ public int[] getFragmentAtoms(int rootAtom) { * @return atoms being in the same fragment as rootAtom */ public int[] getFragmentAtoms(int rootAtom, boolean considerMetalBonds) { + boolean[] isFragmentMember = isFragmentMember = new boolean[mAllAtoms]; + int fragmentMembers = getFragmentAtoms(rootAtom, considerMetalBonds, isFragmentMember); + + int[] fragmentMember = new int[fragmentMembers]; + fragmentMembers = 0; + for (int atom=0; atom * - We also find the path length from the touch point on the smallest ring back to atom.
* - Using heuristics we decide with this information, whether the ring system prevents a flat geometry of atom. + * e.g.: Catalytic Asymmetric Synthesis of Tröger’s Base Analogues with Nitrogen Stereocenter + * Chun Ma, Yue Sun, Junfeng Yang, Hao Guo, and Junliang Zhang + * ACS Central Science 2023 9 (1), 64-71 + * DOI: 10.1021/acscentsci.2c01121 * @param atom * @return true, if the attached ring system prevents a flat geometry of atom */ @@ -2692,12 +2865,11 @@ && getPathLength(pathAtom[1], potentialOtherBridgeHead, 2, null) == 2) && getAtomicNo(bridgeHead) == 7 && getAtomCharge(bridgeHead) != 1; - if (bondCountToBridgeHead == 1 - && !bridgeHeadIsFlat - && !bridgeHeadMayInvert - && smallestRingSize <= 4 - && bridgeAtomCount <= 3) - return true; + if (bondCountToBridgeHead == 1) + return !bridgeHeadIsFlat + && !bridgeHeadMayInvert + && smallestRingSize <= 4 + && bridgeAtomCount <= 3; switch (smallestRingSize) { // case 3 is fully handled @@ -3331,9 +3503,9 @@ public void ensureHelperArrays(int required) { return; if ((mValidHelperArrays & ~(cHelperBitRingsSimple | cHelperBitRings)) != 0) { - for (int atom = 0; atom < mAtoms; atom++) + for (int atom=0; atom 0 && index < idcode.length()-1) - parse(mol, idcode.substring(0, index).getBytes(), idcode.substring(index+1).getBytes()); + parse(mol, idcode.substring(0, index).getBytes(StandardCharsets.UTF_8), idcode.substring(index+1).getBytes(StandardCharsets.UTF_8)); else - parse(mol, idcode.getBytes(), null); + parse(mol, idcode.getBytes(StandardCharsets.UTF_8), null); } /** @@ -177,8 +179,8 @@ public void parse(StereoMolecule mol, byte[] idcode) { * @param coordinates may be null */ public void parse(StereoMolecule mol, String idcode, String coordinates) { - byte[] idcodeBytes = (idcode == null) ? null : idcode.getBytes(); - byte[] coordinateBytes = (coordinates == null) ? null : coordinates.getBytes(); + byte[] idcodeBytes = (idcode == null) ? null : idcode.getBytes(StandardCharsets.UTF_8); + byte[] coordinateBytes = (coordinates == null) ? null : coordinates.getBytes(StandardCharsets.UTF_8); parse(mol, idcodeBytes, coordinateBytes); } @@ -201,7 +203,7 @@ public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates) { * Parses the idcode and populates the given molecule to represent the passed idcode. * @param mol molecule object to be filled with the idcode content * @param idcode may be null - * @param idcodeStart first byte index of idcode + * @param idcodeStart offset in idcode array to first idcode byte */ public void parse(StereoMolecule mol, byte[] idcode, int idcodeStart) { parse(mol, idcode, null, idcodeStart, -1); @@ -212,8 +214,8 @@ public void parse(StereoMolecule mol, byte[] idcode, int idcodeStart) { * @param mol molecule object to be filled with the idcode content * @param idcode may be null * @param coordinates may be null - * @param idcodeStart first byte index of idcode - * @param coordsStart first byte indexif coordinates + * @param idcodeStart offset in idcode array to first idcode byte + * @param coordsStart offset in coordinates array to first coords byte */ public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates, int idcodeStart, int coordsStart) { mol.clear(); @@ -588,7 +590,7 @@ public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates, int idc byte[] label = new byte[count]; for (int j=0; j= this are not considered * @param bonds bond indexes >= this are not considered * @param defaultBondLength - * @param coords may be a second set of the molecule's coordinates, e.g. from a Conformer + * @param coords to be used, either the molecule's coordinates, or an alternative, e.g. from a Conformer * @return */ public double getAverageBondLength(int atoms, int bonds, double defaultBondLength, Coordinates[] coords) { @@ -2384,6 +2472,7 @@ public double getAverageBondLength(int atoms, int bonds, double defaultBondLengt && (mBondQueryFeatures[bond] & cBondQFBridge) == 0) avblSum += coords[mBondAtom[1][bond]].distance(coords[mBondAtom[0][bond]]); } + return avblSum / consideredBonds; } @@ -3136,7 +3225,7 @@ public void setAtomMapNo(int atom, int mapNo, boolean autoMapped) { */ public void setAtomMass(int atom, int mass) { mAtomMass[atom] = mass; - mValidHelperArrays &= cHelperRings; + mValidHelperArrays &= (mAtomicNo[atom] == 1) ? cHelperNone : cHelperRings; } @@ -3158,7 +3247,9 @@ public void setAtomMass(int atom, int mass) { * @param isPseudo true if the configuration is only meaningful relative to another one */ public void setAtomParity(int atom, int parity, boolean isPseudo) { - mAtomFlags[atom] &= ~(cAtomFlagsParity | cAtomParityIsPseudo | cAtomFlagConfigurationUnknown); + mAtomFlags[atom] &= ~(cAtomFlagsParity | cAtomParityIsPseudo); + if (parity != cAtomParityUnknown) + mAtomFlags[atom] &= ~cAtomFlagConfigurationUnknown; mAtomFlags[atom] |= parity; if (isPseudo) mAtomFlags[atom] |= cAtomParityIsPseudo; @@ -3485,7 +3576,7 @@ public void setAtomCustomLabel(int atom, String label) { else { if (mAtomCustomLabel == null) mAtomCustomLabel = new byte[mMaxAtoms][]; - mAtomCustomLabel[atom] = label.getBytes(); + mAtomCustomLabel[atom] = label.getBytes(StandardCharsets.UTF_8); } } @@ -3708,6 +3799,18 @@ public void scaleCoords(double f) { } + public void rotateCoords(double x, double y, double angle) { + double sin = Math.sin(angle); + double cos = Math.cos(angle); + for (int atom=0; atom= 171 && mAtomicNo[atom] <= 190) + return getElectronValenceCorrection(atom, occupiedValence, mAtomicNo[atom]); + } + + protected int getElectronValenceCorrection(int atom, int occupiedValence, int atomicNo) { + if (atomicNo >= 171 && atomicNo <= 190) return 0; int correction = 0; @@ -3937,32 +4044,32 @@ public int getElectronValenceCorrection(int atom, int occupiedValence) { if ((mAtomQueryFeatures[atom] & cAtomQFCharge) == cAtomQFNotCharge0+cAtomQFNotChargeNeg) charge = 1; } - if (mAtomicNo[atom] == 7 // N - || mAtomicNo[atom] == 8 // O - || mAtomicNo[atom] == 9) // F + if (atomicNo == 7 // N + || atomicNo == 8 // O + || atomicNo == 9) // F correction += charge; - else if (mAtomicNo[atom] == 6 // C - || mAtomicNo[atom] == 14 // Si - || mAtomicNo[atom] == 32) // Ge + else if (atomicNo == 6 // C + || atomicNo == 14 // Si + || atomicNo == 32) // Ge correction -= Math.abs(charge); - else if (mAtomicNo[atom] == 15 // P - || mAtomicNo[atom] == 33) { // As + else if (atomicNo == 15 // P + || atomicNo == 33) { // As if (occupiedValence - correction - charge <= 3) correction += charge; else correction -= charge; } - else if (mAtomicNo[atom] == 16 // S - || mAtomicNo[atom] == 34 // Se - || mAtomicNo[atom] == 52) { // Te + else if (atomicNo == 16 // S + || atomicNo == 34 // Se + || atomicNo == 52) { // Te if (occupiedValence - correction - charge <= 4) correction += charge; else correction -= Math.abs(charge); } - else if (mAtomicNo[atom] == 17 // Cl - || mAtomicNo[atom] == 35 // Br - || mAtomicNo[atom] == 53) { // I + else if (atomicNo == 17 // Cl + || atomicNo == 35 // Br + || atomicNo == 53) { // I if (occupiedValence - correction - charge <= 5) correction += charge; else @@ -4084,7 +4191,7 @@ public boolean isTransitionMetalAtom(int atom) { if (mAtomList != null && mAtomList[atom] != null) for (int atomicNo:mAtomList[atom]) - if (!isAtomicNoMetal(atomicNo)) + if (!isAtomicNoTransitionMetal(atomicNo)) return false; } @@ -4098,7 +4205,7 @@ public static boolean isAtomicNoMetal(int atomicNo) { || (atomicNo >= 19 && atomicNo <= 31) || (atomicNo >= 37 && atomicNo <= 51) || (atomicNo >= 55 && atomicNo <= 84) - || (atomicNo >= 87 && atomicNo <= 103); + || (atomicNo >= 87 && atomicNo <= 112); } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Molecule3D.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Molecule3D.java index 5ef902c7..c0e6ac42 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Molecule3D.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Molecule3D.java @@ -387,7 +387,7 @@ else if(o instanceof Integer) else if(o instanceof Double) newObject = new Double((Double)o); else { - System.out.println("ERROR: unexpected Object type. Add support for new type: "+o.toString()); + System.out.println("ERROR: unexpected Object type. Add support for new type: "+o); } } return newObject; diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Mutator.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Mutator.java index 64a50252..86b8d23f 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Mutator.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Mutator.java @@ -1256,10 +1256,13 @@ else if (mol.getConnAtom(rootAtom, i) == atom2) repairCharges(mol); + // Most of the parity flags are still valid; the CoordinateInventor creates up/down bonds from them + mol.setParitiesValid(0); new CoordinateInventor().invent(mol); - mol.setStereoBondsFromParity(); // rescue old parity information, where it is still correct - mol.ensureHelperArrays(Molecule.cHelperParities); // detect over/under-specified stereo information + // we need to invalidate to detect all parities correctly now + mol.invalidateHelperArrays(Molecule.cHelperAll); + repairStereoChemistry(mol); // assign random parities to new stereo centers, and change up/down accordingly } @@ -1537,6 +1540,7 @@ private void repairCharges(StereoMolecule mol) { } private void repairStereoChemistry(StereoMolecule mol) { + mol.ensureHelperArrays(Molecule.cHelperParities); // detect over/under-specified stereo information for (int bond=0; bond getMatchList() { * getMatchList() doesn't include information about atoms, which are part of a matching bridge bond. * This method returns an atom mask for a given matchNo, where all atoms are flagged that are part of a * matching bridge bond within that match. + * If multiple bridges bond matches are possible, for every bridge bond only the shortest bridge is considered. + * Multiple bridge matches don't contribute to the multiplicity of match lists, nor are they considered else where. * @param matchNo index of corresponding match from getMatchList() * @return null or atom mask in target atom space */ diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SSSearcherWithIndex.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SSSearcherWithIndex.java index b58cbe15..0a50e538 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SSSearcherWithIndex.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SSSearcherWithIndex.java @@ -36,6 +36,8 @@ import com.actelion.research.chem.descriptor.AbstractDescriptorHandlerLongFP; +import java.nio.charset.StandardCharsets; + // TODO purge mMoleculeIndexInt,mFragmentIndexInt and related methods from this class. Long versions were introduced Aug 3, 2018 public class SSSearcherWithIndex { @@ -607,7 +609,7 @@ public void setFragment(StereoMolecule fragment, int[] index) { @Deprecated // Use long version of this method public void setFragment(String idcode, int[] index) { - setFragment(idcode.getBytes(), index); + setFragment(idcode.getBytes(StandardCharsets.UTF_8), index); } @@ -640,7 +642,7 @@ public void setMolecule(StereoMolecule molecule, int[] index) { @Deprecated // Use long version of this method public void setMolecule(String idcode, int[] index) { - setMolecule(idcode.getBytes(), index); + setMolecule(idcode.getBytes(StandardCharsets.UTF_8), index); } @@ -671,7 +673,7 @@ public void setFragment(StereoMolecule fragment, long[] index) { public void setFragment(String idcode, long[] index) { - setFragment(idcode.getBytes(), index); + setFragment(idcode.getBytes(StandardCharsets.UTF_8), index); } @@ -701,7 +703,7 @@ public void setMolecule(StereoMolecule molecule, long[] index) { public void setMolecule(String idcode, long[] index) { - setMolecule(idcode.getBytes(), index); + setMolecule(idcode.getBytes(StandardCharsets.UTF_8), index); } @@ -1053,7 +1055,7 @@ public static String getHexStringFromIndex(int[] index) { } } - return new String(bytes); + return new String(bytes, StandardCharsets.UTF_8); } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java index 9400f0e9..8441fcfc 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java @@ -39,6 +39,7 @@ import com.actelion.research.util.ArrayUtils; import com.actelion.research.util.SortedList; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.TreeMap; @@ -117,7 +118,7 @@ public void setRandomSeed(long seed) { } public StereoMolecule parseMolecule(String smiles) { - return smiles == null ? null : parseMolecule(smiles.getBytes()); + return smiles == null ? null : parseMolecule(smiles.getBytes(StandardCharsets.UTF_8)); } /** @@ -138,13 +139,37 @@ public StereoMolecule parseMolecule(byte[] smiles) { return mol; } + public static boolean isReactionSmiles(byte[] smiles) { + int count = 0; + int index = -1; + + while (count < 3) { + index = ArrayUtils.indexOf(smiles, (byte)'>', index + 1); + while (index>0 && smiles[index - 1] == (byte)'-') + index = ArrayUtils.indexOf(smiles, (byte)'>', index + 1); + + if (index == -1) + break; + + count++; + } + + return count == 2; + } + public Reaction parseReaction(String smiles) throws Exception { - return smiles == null ? null : parseReaction(smiles.getBytes()); - } + return smiles == null ? null : parseReaction(smiles.getBytes(StandardCharsets.UTF_8)); + } public Reaction parseReaction(byte[] smiles) throws Exception { int index1 = ArrayUtils.indexOf(smiles, (byte)'>'); + while (index1 > 0 && smiles[index1-1] == (byte)'-') + index1 = ArrayUtils.indexOf(smiles, (byte)'>', index1+1); + int index2 = (index1 == -1) ? -1 : ArrayUtils.indexOf(smiles, (byte)'>', index1+1); + while (index2 > 0 && smiles[index2-1] == (byte)'-') + index2 = ArrayUtils.indexOf(smiles, (byte)'>', index2+1); + if (index2 == -1) throw new Exception("Missing one or both separators ('>')."); if (ArrayUtils.indexOf(smiles, (byte)'>', index2+1) != -1) @@ -222,7 +247,7 @@ public String getSmartsWarning() { * @throws Exception */ public void parse(StereoMolecule mol, String smiles) throws Exception { - parse(mol, smiles.getBytes(), true, true); + parse(mol, smiles.getBytes(StandardCharsets.UTF_8), true, true); } public void parse(StereoMolecule mol, byte[] smiles) throws Exception { @@ -269,6 +294,7 @@ public void parse(StereoMolecule mol, byte[] smiles, int position, int endIndex, int bondQueryFeatures = 0; SortedList atomList = new SortedList<>(); SmilesRange range = new SmilesRange(smiles); + AtomInfo atomInfo = new AtomInfo(); while (smiles[position] <= 32) position++; @@ -298,17 +324,6 @@ public void parse(StereoMolecule mol, byte[] smiles, int position, int endIndex, else if (theChar == '?') { atomicNo = 0; } - else if (theChar == '#') { - int number = 0; - while (position < endIndex - && Character.isDigit(smiles[position])) { - number = 10 * number + smiles[position] - '0'; - position++; - } - if (number < 1 || number >= Molecule.cAtomLabel.length) - throw new Exception("SmilesParser: Atomic number out of range."); - atomicNo = number; - } else { boolean isNot = (theChar == '!'); if (isNot) { @@ -326,54 +341,42 @@ else if (theChar == '#') { position--; } else { - int labelLength = Character.isLowerCase(smiles[position]) ? 2 : 1; - atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position-1, labelLength)); - if (atomicNo == -1) { - atomicNo = 6; - atomQueryFeatures |= Molecule.cAtomQFAny; - position--; - } - else { - position += labelLength - 1; - explicitHydrogens = HYDROGEN_IMPLICIT_ZERO; - - // If we have a comma after the first atom label, then we need to parse a list. - // In this case we also have to set aromaticity query features from upper and lower case symbols. - if (allowSmarts && (smiles[position] == ',' || isNot)) { - boolean upperCaseFound = false; - boolean lowerCaseFound = false; - int start = position - labelLength; - for (int p=start; p 1) { - explicitHydrogens = HYDROGEN_ANY; // don't use implicit zero with atom lists - if (!upperCaseFound) - atomQueryFeatures |= Molecule.cAtomQFAromatic; - else if (!lowerCaseFound) - atomQueryFeatures |= Molecule.cAtomQFNotAromatic; + getGetInBracketAtomInfo(smiles, position-1, endIndex, atomInfo); + atomicNo = atomInfo.atomicNo; + position += atomInfo.labelLength - 1; + explicitHydrogens = HYDROGEN_IMPLICIT_ZERO; + + // If we have a comma after the first atom label, then we need to parse a list. + // In this case we also have to set aromaticity query features from upper and lower case symbols. + if (allowSmarts && (smiles[position] == ',' || isNot)) { + boolean mayBeAromatic = atomInfo.mayBeAromatic; + boolean mayBeAliphatic = atomInfo.mayBeAliphatic; + int start = position - atomInfo.labelLength; + while (start < endIndex) { + getGetInBracketAtomInfo(smiles, start, endIndex, atomInfo); + atomList.add(atomInfo.atomicNo); + mayBeAromatic |= atomInfo.mayBeAromatic; + mayBeAliphatic |= atomInfo.mayBeAliphatic; + start += atomInfo.labelLength; + if (smiles[start] != ',') + break; + start++; + if (smiles[start] == '!') { + if (!isNot) + throw new Exception("SmilesParser: inconsistent '!' in atom list."); + start++; } + } - position = start-1; + if (atomList.size() > 1) { + explicitHydrogens = HYDROGEN_ANY; // don't use implicit zero with atom lists + if (!mayBeAliphatic) + atomQueryFeatures |= Molecule.cAtomQFAromatic; + else if (!mayBeAromatic) + atomQueryFeatures |= Molecule.cAtomQFNotAromatic; } + + position = start; } } } @@ -1116,22 +1119,6 @@ else if (!mMakeHydrogenExplicit && (smartsFeatureFound || mSmartsMode == SMARTS_ } - private int parseAtomList(byte[] smiles, int start, SortedList atomList) { - atomList.removeAll(); - for (int p=start; p= Molecule.cAtomLabel.length) + throw new Exception("SmilesParser: Atomic number out of range."); + } + else if (smiles[position] >= 'A' && smiles[position] <= 'Z') { + info.labelLength = (smiles[position+1] >= 'a' && smiles[position+1] <= 'z') ? 2 : 1; + info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8)); + info.mayBeAromatic = false; + } + else if (smiles[position] >= 'a' && smiles[position] <= 'z') { + info.labelLength = (smiles[position+1] >= 'a' && smiles[position+1] <= 'z') ? 2 : 1; + info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8)); + info.mayBeAliphatic = false; + } + else + throw new Exception("SmilesParser: Unexpected character within brackets:'"+((char)smiles[position])+"'"); + } + private int bondSymbolToQueryFeature(char symbol) { return symbol == '=' ? Molecule.cBondQFDouble : symbol == '#' ? Molecule.cBondQFTriple @@ -1663,6 +1680,11 @@ private boolean assignKnownEZBondParities() { return paritiesFound; } + private class AtomInfo { + boolean mayBeAromatic,mayBeAliphatic; + int atomicNo,labelLength; + } + private class ParityNeighbour { int mAtom,mPosition; diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StereoMolecule.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StereoMolecule.java index e9b5e5a6..eec403c1 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StereoMolecule.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StereoMolecule.java @@ -105,9 +105,9 @@ public void copyMoleculeProperties(Molecule destMol) { * If fragment separation is only needed, if there are multiple fragments, it may be more * efficient to run this functionality in two steps, e.g.:
* int[] fragmentNo = new int[mol.getAllAtoms()];
- * int fragmentCount = getFragmentNumbers(fragmentNo, boolean, boolean);
+ * int fragmentCount = mol.getFragmentNumbers(fragmentNo, false, false);
* if (fragmentCount > 1) {
- * StereoMolecule[] fragment = getUniqueFragmentsEstimated(int[] fragmentNo, fragmentCount);
+ * StereoMolecule[] fragment = mol.getFragments(fragmentNo, fragmentCount);
* ...
* }
* @return diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StructureSearchSpecification.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StructureSearchSpecification.java index fefb6d28..7148f530 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StructureSearchSpecification.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/StructureSearchSpecification.java @@ -37,6 +37,7 @@ import com.actelion.research.chem.descriptor.DescriptorHelper; import java.io.Serializable; +import java.nio.charset.StandardCharsets; public class StructureSearchSpecification implements Serializable { static final long serialVersionUID = 0x20120402; @@ -51,7 +52,8 @@ public class StructureSearchSpecification implements Serializable { public static final int TYPE_TAUTOMER_NO_STEREO = 0x000006; public static final int TYPE_BACKBONE_NO_STEREO = 0x000007; - public static final int MODE_LARGEST_FRAGMENT_ONLY = 0x000100; + public static final int MODE_LARGEST_FRAGMENT_ONLY = 0x000100; + public static final int MODE_SINGLE_MATCH_ONLY = 0x000200; private int mSearchType; private byte[][] mIDCode; @@ -177,16 +179,36 @@ public boolean isLargestFragmentOnly() { return (mSearchType & MODE_LARGEST_FRAGMENT_ONLY) != 0; } + public boolean isSingleMatchOnly() { + return (mSearchType & MODE_SINGLE_MATCH_ONLY) != 0; + } + public void removeDescriptors() { mDescriptor = null; } + /** + * + * @param b + */ public void setLargestFragmentOnly(boolean b) { mSearchType &= ~MODE_LARGEST_FRAGMENT_ONLY; if (b) mSearchType |= MODE_LARGEST_FRAGMENT_ONLY; } + /** + * In case of a substructure search, as default a molecule is considered a match if the query + * structure is found once or multiple times. To consider only single matches a match, call this + * method with argument true. + * @param b + */ + public void setSingleMatchOnly(boolean b) { + mSearchType &= ~MODE_SINGLE_MATCH_ONLY; + if (b) + mSearchType |= MODE_SINGLE_MATCH_ONLY; + } + public String getDescriptorShortName() { return mDescriptorShortName; } @@ -233,7 +255,7 @@ public String toString() { + (((mSearchType & MODE_LARGEST_FRAGMENT_ONLY) != 0) ? "/largestFragmentOnly":""); return "type:"+typeString - + (mIDCode==null?" idcodes:null":mIDCode.length==1?" idcode:"+(mIDCode[0]==null?"null":new String(mIDCode[0])):" idcodeCount:"+mIDCode.length) + + (mIDCode==null?" idcodes:null":mIDCode.length==1?" idcode:"+(mIDCode[0]==null?"null":new String(mIDCode[0], StandardCharsets.UTF_8)):" idcodeCount:"+mIDCode.length) + (mDescriptor==null?" descriptors:null":" descriptorCount:"+mDescriptor.length); } } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/conf/ConformerSet.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/conf/ConformerSet.java index a3e21831..4303b51c 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/conf/ConformerSet.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/conf/ConformerSet.java @@ -39,6 +39,7 @@ import com.actelion.research.chem.StereoMolecule; import com.actelion.research.util.ArrayUtils; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.TreeSet; @@ -62,7 +63,7 @@ public ConformerSet(String s) { for(int i=2; i templateList) { /** * Creates new atom 2D-coordinates for a molecule or a part of a molecule. - * Coordinates will correctly reflect E/Z double bond parities, unless the double bond is in a small ring. + * Typically, the molecule has defined TH- and EZ-parities (even if unknown or none), which were not + * calculated, but taken from a SMILES or from an IDCode. In these cases setParitiesValid() should have + * been called to indicate that a parity calculation is not needed and even would destroy given parities. + * New coordinates will correctly reflect E/Z double bond parities, unless the double bond is in a small ring. * If atom parities are available, this call is typically followed by calling mol.setStereoBondsFromParity(); * Unneeded explicit hydrogens are removed, if mode includes MODE_REMOVE_HYDROGEN. * The relative orientation of all marked atoms is retained, if mode includes MODE_KEEP_MARKED_ATOM_COORDS. @@ -1830,7 +1833,7 @@ private int[] getShortestConnection(int atom1, int atom2) { int current = 0; int highest = 0; while (current <= highest) { - for (int i=0; i implements DescriptorHandler { protected static final int[] FAILED_OBJECT = new int[0]; public String encode(int[] o) { return calculationFailed(o) ? FAILED_STRING - : new String(new DescriptorEncoder().encode(o)); + : new String(new DescriptorEncoder().encode(o), StandardCharsets.UTF_8); } public int[] decode(String s) { diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/AbstractDescriptorHandlerLongFP.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/AbstractDescriptorHandlerLongFP.java index c59fe8a2..b46eb980 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/AbstractDescriptorHandlerLongFP.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/AbstractDescriptorHandlerLongFP.java @@ -33,16 +33,17 @@ package com.actelion.research.chem.descriptor; -import java.util.Arrays; +import com.actelion.research.chem.SSSearcherWithIndex; -import com.actelion.research.chem.*; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; abstract public class AbstractDescriptorHandlerLongFP implements DescriptorHandler { protected static final long[] FAILED_OBJECT = new long[0]; public String encode(long[] o) { return calculationFailed(o) ? FAILED_STRING - : new String(new DescriptorEncoder().encodeLong(o)); + : new String(new DescriptorEncoder().encodeLong(o), StandardCharsets.UTF_8); } public long[] decode(String s) { diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorEncoder.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorEncoder.java index 8f6ebb45..afbedd6d 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorEncoder.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorEncoder.java @@ -33,6 +33,8 @@ package com.actelion.research.chem.descriptor; +import java.nio.charset.StandardCharsets; + /** * DescriptorEncoder encodes int[] based descriptors * into byte arrays that may be used to instantiate Strings @@ -44,10 +46,10 @@ public class DescriptorEncoder { private static final int PAIR_BITS = 4; // CODE Strings must contain highest ASCII character at the end; unused characters: " ' \ ` - private static final byte[] sCode = "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".getBytes(); - private static final byte[] sCodeMultipleMin = "!#$%&()*+,-./".getBytes(); - private static final byte[] sCodeMultipleMax = ":;<=>?[]^{|}~".getBytes(); - private static int[] sDecode,sDecodeMultiple; + private static final byte[] sCode = "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".getBytes(StandardCharsets.UTF_8); + private static final byte[] sCodeMultipleMin = "!#$%&()*+,-./".getBytes(StandardCharsets.UTF_8); + private static final byte[] sCodeMultipleMax = ":;<=>?[]^{|}~".getBytes(StandardCharsets.UTF_8); + private static volatile int[] sDecode,sDecodeMultiple; private byte[] mBytes; private int mByteIndex,mAvailableBits,mTempData,mByteMask; @@ -55,19 +57,21 @@ public class DescriptorEncoder { public DescriptorEncoder() { if (sDecode == null) { - synchronized(this) { - int len = 1 << BITS; - assert len <= sCode.length : "Error in encoding, not enough characters."; - - sDecode = new int[sCode[sCode.length-1]+1]; - for (int i=0; i extends ISimilarityCalculator { static final String FAILED_STRING = "Calculation Failed"; - static final byte[] FAILED_BYTES = FAILED_STRING.getBytes(); + static final byte[] FAILED_BYTES = FAILED_STRING.getBytes(StandardCharsets.UTF_8); public abstract DescriptorInfo getInfo(); public abstract String getVersion(); public abstract String encode(T o); diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerIntVector.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerIntVector.java index e7a88dfa..6a0e5b69 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerIntVector.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerIntVector.java @@ -34,6 +34,8 @@ package com.actelion.research.chem.descriptor; +import java.nio.charset.StandardCharsets; + /** * This is a descriptor handler, where the input object is an integer array * that typically represents counts of some sort. This class may be used @@ -89,7 +91,7 @@ public String getVersion() { @Override public String encode(int[] d) { - return calculationFailed(d) ? FAILED_STRING : new String(new DescriptorEncoder().encodeIntArray(d)); + return calculationFailed(d) ? FAILED_STRING : new String(new DescriptorEncoder().encodeIntArray(d), StandardCharsets.UTF_8); } @Override diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerSkeletonSpheres.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerSkeletonSpheres.java index 72538b5e..e1e7d709 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerSkeletonSpheres.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/descriptor/DescriptorHandlerSkeletonSpheres.java @@ -38,6 +38,7 @@ import com.actelion.research.chem.StereoMolecule; import com.actelion.research.util.BurtleHasher; +import java.nio.charset.StandardCharsets; import java.util.Arrays; public class DescriptorHandlerSkeletonSpheres implements DescriptorHandler { @@ -156,7 +157,7 @@ public byte[] decode(byte[] bytes) { public String encode(byte[] o) { return calculationFailed(o) ? FAILED_STRING - : new String(new DescriptorEncoder().encodeCounts(o)); + : new String(new DescriptorEncoder().encodeCounts(o), StandardCharsets.UTF_8); } public DescriptorInfo getInfo() { diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java index 2ba19b0b..45af4174 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java @@ -186,6 +186,7 @@ public interface CompoundTableConstants { String cColumnPropertyCalculated = "calculated"; //for columns that can be calculated by a task String cColumnPropertyChemistryDisplayMode = "chemistryDisplayMode"; // display mode for molecules, e.g. to better recognize query features String cColumnPropertyChemistryTextSize = "chemistryTextSize"; // display text size for molecule atom labels; default is 1.0 + String cColumnPropertySARFirstRGroup = "firstRGroup"; // first R-group number used with core-based SAR on Scaffolds (sub-SAR) String cSuperposeValueReferenceRow = "refRow"; // "reference" or null String cSuperposeAlignValueShape = "shape"; // "reference" or null diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/ExhaustiveFragmentGeneratorBonds.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/ExhaustiveFragmentGeneratorBonds.java index 4e244b72..6f7a4ad0 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/ExhaustiveFragmentGeneratorBonds.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/ExhaustiveFragmentGeneratorBonds.java @@ -198,7 +198,7 @@ private void generateFragments(){ if(addedTotal>totalMaximumCapacity) { if(ELUSIVE) { - System.out.println("ExhaustiveFragmentGeneratorBonds generateFragments() maximum capacity break."); + System.out.println("ExhaustiveFragmentGeneratorBonds generateFragments() maximum capacity (" + totalMaximumCapacity + ") break."); log(neighbours, neighboursTotal, added, addedTotal, neighboursSinceLastAdded); } // Clear all records for this number of bonds. @@ -218,8 +218,9 @@ private void generateFragments(){ } } - if(neighboursSinceLastAdded > LIMIT_NEIGHBOURS_SINCE_LAST_ADDED) { - System.out.println("ExhaustiveFragmentGeneratorBonds generateFragments(). Break for fragments with " + i + " bonds. Generated " + neighboursSinceLastAdded + " neighbours since last add to hash map."); + // if(neighboursSinceLastAdded > LIMIT_NEIGHBOURS_SINCE_LAST_ADDED) { + if(neighboursSinceLastAdded > totalMaximumCapacity) { + System.out.println("ExhaustiveFragmentGeneratorBonds generateFragments(). Break for fragments with " + i + " bonds. Generated " + totalMaximumCapacity + " neighbours since last add to hash map."); break; } } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/RunBondVector2IdCode.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/RunBondVector2IdCode.java index 2a958d30..c7d4127c 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/RunBondVector2IdCode.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/RunBondVector2IdCode.java @@ -104,7 +104,7 @@ public void run() { } catch (Exception e){ e.printStackTrace(); } finally { - System.out.println("RunBondVector2IdCode finally reached."); + // System.out.println("RunBondVector2IdCode finally reached."); endOfRunReached.set(true); } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/PropertyCalculator.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/PropertyCalculator.java similarity index 91% rename from src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/PropertyCalculator.java rename to src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/PropertyCalculator.java index aa083f2a..470df42e 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/PropertyCalculator.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/PropertyCalculator.java @@ -31,12 +31,9 @@ * */ -package com.actelion.research.chem; +package com.actelion.research.chem.prediction; -import com.actelion.research.chem.prediction.CLogPPredictor; -import com.actelion.research.chem.prediction.ParameterizedStringList; -import com.actelion.research.chem.prediction.PolarSurfaceAreaPredictor; -import com.actelion.research.chem.prediction.SolubilityPredictor; +import com.actelion.research.chem.StereoMolecule; public class PropertyCalculator { diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/TotalSurfaceAreaPredictor.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/TotalSurfaceAreaPredictor.java index a9ad0bd3..2580d068 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/TotalSurfaceAreaPredictor.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/prediction/TotalSurfaceAreaPredictor.java @@ -88,8 +88,7 @@ public class TotalSurfaceAreaPredictor extends PolarSurfaceAreaPredictor { 7.85f, 20.62f, 0.04f, -3.09f, 4.71f, 12.52f, 14.90f, 5.75f, 13.25f, 15.66f, 13.76f, 18.04f, 19.01f, 5.09f, 5.46f, 12.21f, 13.10f, 22.17f, 25.38f, 33.03f }; - /* These are the increments approximating the Schroedinger method (VdW radii, 1.4A probe), - * which creates somewhat smaller values than the Schroedinger method. + /* These are the increments approximating the Schroedinger method (VdW radii, 1.4A probe). * The average error of the PLS prediction (ChEMBL training set) was about 10 square angstrom. private static final float[] cPolarIncrement = { 3.96f, 12.12f, 7.11f, 3.65f, 18.40f, -0.46f, 12.34f, 18.64f, 13.41f, 13.51f, diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java index 1ce0dbf7..e9568ebd 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java @@ -37,6 +37,7 @@ import com.actelion.research.chem.*; import com.actelion.research.util.ArrayUtils; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; @@ -70,7 +71,7 @@ private ReactionEncoder() * creating idcodes of every reactant and product and * concatenating them in lexical order. * If mapping information is available this will be encoded - * in a 2nd string. Otherwise this will be null. + * in a 2nd string. Otherwise this will be an empty string. * Coordinates, if available, will be encoded in a 3rd string. * If there are drawing objects assigned to this reaction * then these are encoded in a 4th string. @@ -352,7 +353,7 @@ public static Reaction decode(String rxnCode, String rxnMapping, String rxnCoord StereoMolecule mol = parser.getCompactMolecule(idcode, coords); if (mapping != null) { - parser.parseMapping(mapping.getBytes()); + parser.parseMapping(mapping.getBytes(StandardCharsets.UTF_8)); } if (isProduct) { @@ -607,23 +608,23 @@ public static StereoMolecule[] decodeMolecules(String s, boolean includeCoords, byte[] rxnCoords = null; int index1 = s.indexOf(OBJECT_DELIMITER); if (index1 == -1) { - rxnCode = s.getBytes(); + rxnCode = s.getBytes(StandardCharsets.UTF_8); } else { - rxnCode = s.substring(0, index1).getBytes(); + rxnCode = s.substring(0, index1).getBytes(StandardCharsets.UTF_8); if (includeMapping || includeCoords) { int index2 = s.indexOf(OBJECT_DELIMITER, index1 + 1); if (index2 == -1) { if (includeMapping) - rxnMapping = s.substring(index1 + 1).getBytes(); + rxnMapping = s.substring(index1 + 1).getBytes(StandardCharsets.UTF_8); } else { if (includeMapping) - rxnMapping = s.substring(index1 + 1, index2).getBytes(); + rxnMapping = s.substring(index1 + 1, index2).getBytes(StandardCharsets.UTF_8); if (includeCoords) { int index3 = s.indexOf(OBJECT_DELIMITER, index2 + 1); if (index3 == -1) { - rxnCoords = s.substring(index2 + 1).getBytes(); + rxnCoords = s.substring(index2 + 1).getBytes(StandardCharsets.UTF_8); } else { - rxnCoords = s.substring(index2 + 1, index3).getBytes(); + rxnCoords = s.substring(index2 + 1, index3).getBytes(StandardCharsets.UTF_8); } } } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/CoreBasedSARAnalyzer.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/CoreBasedSARAnalyzer.java new file mode 100644 index 00000000..c3a8ec61 --- /dev/null +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/CoreBasedSARAnalyzer.java @@ -0,0 +1,606 @@ +package com.actelion.research.chem.sar; + +import com.actelion.research.chem.*; +import com.actelion.research.chem.coords.CoordinateInventor; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.TreeMap; + +public class CoreBasedSARAnalyzer { + public static final boolean DISTINGUISH_STEREO_CENTERS = true; + private static final int MAX_R_GROUPS = 16; + + private StereoMolecule mQuery,mFragment; + private SSSearcher mSearcher; + private SSSearcherWithIndex mSearcherWithIndex; + private SARMolecule[] mSARMolecule; // contains info of analyzed molecules, e.g. substituents and corresponding SARScaffold + private TreeMap mScaffoldMap; // map of core structure idcodes to corresponding SARScaffold + private SARScaffoldGroup mScaffoldGroup; + private int[] mPreferredQueryAtomRGroupMatch; + + /** + * This class runs a complete structure-activity-relationship (SAR) analysis from molecules that share + * one or multiple common similar scaffold(s). For one given query substructure this class analyses many + * molecules, whether the query substructure is found and which substituents are connected at which positions. + * If the query substructure is found in (matches) multiple molecules, then the matching substructures + * may be the same in all cases, or they may differ between some molecules. This happens, for instance, + * if the query contains wildcard atoms that match to multiple atom types, or if a bridge bond + * matches to atom chains of different lengths.
+ * The matching substructure of a molecule is called core structure. All molecules that share the same + * core structure are analyzed regarding which core structure atom carries which substituents within + * these molecules. If the substitution is varying within these molecules for a core structrure atom, then + * an R-group is assigned to that position. An atom may get multiple R-groups if attachment positions are + * diastereotop or if it sees multiple substituents in some of its molecules. The core structure with numbered + * attached R-groups at all exit vectors with changing substitution is called a scaffold. + * Thus, a specific core structure always gives rise to a specific scaffold structure. + * Thus, all N scaffolds derived from all N core structures caused by one query structure + * form a scaffold group. R-group numbering between all scaffolds within the same scaffold group + * is compatible. This means that R-groups at equivalent exit vectors of two different scaffolds within + * the same group have the same number. Two exit vectors are considered equivalent, if they are connected + * to atoms, which match to the same query structure atom, and if they are diastereotop, e.g. both connected + * with an up-stereo bond when super-positioning their atom coordinates.
+ * For any given molecule a scaffold structure is constructed the following way:
+ * - A substructure search locates all matches of the query structure and selects a preferred match based + * on the substitution pattern. If no match is found, then this molecule is skipped from the analysis.
+ * - All matching atoms and bonds are taken as a 'core' structure. If the query contains bridge bonds, then + * those molecule atoms that match on the bridge bonds also belong to the core structure.
+ * - When the core structure is determined for a molecule, then all remaining atoms of the molecule that + * don't belong to the core structure, are part of substituents.
+ * - For every exit vector of the core structure, i.e. a bond that connects to a substituent atom, the + * substituent structure is determined. A substituent atom may connect back to another exit vector of + * the core structure, causing a ring closure.
+ * - After the analysis of all molecules that share the same core structure, a copy of the + * core structure is created. Then all exit vectors that have at least two different substituents + * throughout all molecules (e.g. -H and -Me) an R-group is attached to the core structure. + * If all molecules have the same substituent at one core structure position, then no R-group is attached. + * Instead, that substituent itself is attached to the core structure.
+ * - The core structure with attached R-groups and attached constant substituents constitutes the scaffold + * structure for a particular molecule.
+ * - All molecules that match to the same query structure don't necessarily share the same scaffold structure, + * e.g. if a query bridge bond matches on a different chain length or if a wildcard atom matches on a + * different atom type, then the constructed scaffold structure differs in these aspects. Also, the + * number of attached R-groups may be different for different scaffolds, if all molecules belonging + * to one scaffold have no substituent at a position that is substituted on a related scaffold's molecules. + * However, it is assured, that R-group numbering is always the same among the entire scaffold group.
+ * Summary: If molecules match to the same query structure, they belong to the same scaffold group, + * but nonetheless, their assigned scaffold structures may differ concerning atom types, ring sizes, and + * count of attached R-groups. The R-group numbering (R1, R2, ...), however, is compatible, i.e. R-groups + * at equivalent positions have the same number.
+ * @param query substructure with valid atom coordinates that defines one or multiple scaffolds (e.g. via atom lists or bond bridges) + */ + public CoreBasedSARAnalyzer(StereoMolecule query, int moleculeCount) { + mQuery = query; + mQuery.ensureHelperArrays(Molecule.cHelperNeighbours); + + mSARMolecule = new SARMolecule[moleculeCount]; + mScaffoldMap = new TreeMap<>(); + mScaffoldGroup = new SARScaffoldGroup(query); + + mFragment = new StereoMolecule(); // used as molecule buffer + + mPreferredQueryAtomRGroupMatch = new int[MAX_R_GROUPS]; + Arrays.fill(mPreferredQueryAtomRGroupMatch, -1); + } + + /** + * Adds a molecule to the SAR-analyzer:
+ * - determines core structure from the preferred query match
+ * - if this core structure was not seen yet, creates a new scaffold object for this core structure
+ * - creates new SAR-molecule data object with scaffold and substituent information
+ * Use this version of addMolecule() if you don't have pre-calculated fragment fingerprints + * for your molecules available. + * @param mol + * @param index + * @return + */ + public int setMolecule(StereoMolecule mol, int index) { + if (mSearcher == null) { + mSearcher = new SSSearcher(); + mSearcher.setFragment(mQuery); + } + + mSearcher.setMolecule(mol); + int matchCount = mSearcher.findFragmentInMolecule(SSSearcher.cCountModeRigorous, SSSearcher.cDefaultMatchMode); + if (matchCount == 0) + return 0; + + setMolecule(mol, mSearcher, index); + + return matchCount; + } + + /** + * Adds a molecule to the SAR-analyzer:
+ * - determines core structure from the preferred query match
+ * - if this core structure was not seen yet, creates a new scaffold object for this core structure
+ * - creates new SAR-molecule data object with scaffold and substituent information
+ * Use this version of addMolecule() if you have in memory molecules and pre-calculated fragment fingerprints. + * @param mol + * @param ffp + * @param index + * @return + */ + public int setMolecule(StereoMolecule mol, long[] ffp, int index) { + if (mSearcherWithIndex == null) { + mSearcherWithIndex = new SSSearcherWithIndex(); + mSearcherWithIndex.setFragment(mQuery, (long[])null); + } + + mSearcherWithIndex.setMolecule(mol, ffp); + int matchCount = mSearcherWithIndex.findFragmentInMolecule(SSSearcher.cCountModeRigorous, SSSearcher.cDefaultMatchMode); + if (matchCount == 0) + return 0; + + setMolecule(mol, mSearcherWithIndex.getGraphMatcher(), index); + + return matchCount; + } + + /** + * Adds a molecule to the SAR-analyzer:
+ * - determines core structure from the preferred query match
+ * - if this core structure was not seen yet, creates a new scaffold object for this core structure
+ * - creates new SAR-molecule data object with scaffold and substituent information
+ * Use this version of addMolecule() if you have idcodes, coords, and pre-calculated fragment fingerprints + * of your molecules. + * @param idcode + * @param coords + * @param ffp + * @param index + * @return + */ + public int setMolecule(byte[] idcode, byte[] coords, long[] ffp, int index) { + if (mSearcherWithIndex == null) { + mSearcherWithIndex = new SSSearcherWithIndex(); + mSearcherWithIndex.setFragment(mQuery, (long[])null); + } + + mSearcherWithIndex.setMolecule(idcode, ffp); + int matchCount = mSearcherWithIndex.findFragmentInMolecule(SSSearcher.cCountModeRigorous, SSSearcher.cDefaultMatchMode); + if (matchCount == 0) + return 0; + + setMolecule(new IDCodeParser(true).getCompactMolecule(idcode, coords), mSearcherWithIndex.getGraphMatcher(), index); + + return matchCount; + } + + private void setMolecule(StereoMolecule mol, SSSearcher searcher, int index) { + int match = findPreferredMatch(mol, searcher.getMatchList()); + + int[] queryToMolAtom = searcher.getMatchList().get(match); + + // Mark all atoms belonging to core fragment + boolean[] isCoreAtom = new boolean[mol.getAtoms()]; + for (int i=0; i= 129 && atomicNo <= 144) + isCoreAtom[connAtom] = true; + } + } + } + } + + boolean[] isBridgeAtom = searcher.getMatchingBridgeBondAtoms(match); + if (isBridgeAtom != null) + for (int i=0; i MAX_R_GROUPS) { + for (SARMolecule molecule: mSARMolecule) + if (molecule != null + && molecule.getScaffold().getRGroupCount() > MAX_R_GROUPS) + molecule.clear(); + + rGroupCountExceeded = true; + } + + scaffold.addRGroupsToCoreStructure(); + } + + for (SARMolecule molecule: mSARMolecule) + if (molecule != null) + molecule.correctSubstituentRingClosureLabels(); + + return !rGroupCountExceeded; + } + + /** + * Uses a simple strategy to determine the preferred match: + * It preferrers matches that carry substituents at low atom indexes. + * @param mol + * @param matchList + * @return + */ + private int findPreferredMatch(StereoMolecule mol, ArrayList matchList) { + if (matchList.size() == 1) + return 0; + + int bestMatch = -1; + int bestScore = Integer.MIN_VALUE; + int[] bestQueryAtomRGroupMatch = null; + + mol.ensureHelperArrays(Molecule.cHelperNeighbours); + + for (int i=0; i0) + score -= atom * addedValence; + } + } + + // In case of 2-step SAR deconvolutions, where we may have R-groups as substituents, + // we try to choose those matches, which have the same R-groups at + // the same positions. + int[] queryAtomRGroupMatch = getExistingQueryAtomRGroupMatch(match, isUsedAtom, mol); + if (queryAtomRGroupMatch != null) { + int matchingRGroupCount = 0; + for (int k=0; k= 129 && atomicNo <= 144) { + if (rGroupToQueryAtom == null) { + rGroupToQueryAtom = new int[MAX_R_GROUPS]; + Arrays.fill(rGroupToQueryAtom, -1); + } + int rGroupNo = (atomicNo >= 142) ? atomicNo - 142 : atomicNo - 126; // 0-based + rGroupToQueryAtom[rGroupNo] = i; + } + } + } + } + } + return rGroupToQueryAtom; + } + + private void adaptCoreAtomCoordsFromQuery(StereoMolecule query, StereoMolecule core, int[] queryToCoreAtom, boolean hasBridgeAtoms) { + if (!hasBridgeAtoms) { + // just copy query atom coordinates and mark them to be untouched for later coordinate invention + for (int queryAtom = 0; queryAtom backConnectionList = new ArrayList<>(); + + isSubstituentAtom[rootAtom] = true; + isSubstituentAtom[exitAtom] = true; + isSubstituentBond[rootBond] = true; + workAtom[0] = rootAtom; + workAtom[1] = exitAtom; + int current = 1; + int highest = 1; + while (current <= highest) { + for (int i=0; i=0; bond--) + if (mBuffer.getAtomicNo(mBuffer.getBondAtom(0, bond)) == 0 + && mBuffer.getAtomicNo(mBuffer.getBondAtom(1, bond)) == 0) + mBuffer.deleteBond(bond); + + mSubstituent[exitVectorIndex] = new Canonizer(mBuffer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); + mSubstituentBondOrder[exitVectorIndex] = mMol.getBondOrder(rootBond); + } + + /** + * In case of substituent atom connecting back to the core structure, the exit vector index is encoded as label + * in the substituent idcode. + * This is needed within the check for varying substituents, because of the label chains with inverted direction + * are recognized as different substituents. Also, otherwise equal chains that connect back to different exit + * vectors are also recognized as being different. + * After the check for varying substituents and once we have a mapping from exit vector index to R-group index, + * we need to exchange the label by a new one with the R-Group index, which should be finally displayed to the user. + */ + protected void correctSubstituentRingClosureLabels() { + if (mSubstituentConnectsBack != null) { + for (int exitVectorIndex = 0; exitVectorIndex mOldToNewMap; + private int mBridgeAtomRGroupCount; + private SARScaffoldGroup mScaffoldGroup; + private ExitVector[] mBridgeAtomExitVector; + private boolean[] mHasSeenSubstituentOnScaffold; + private int[] mSeenBondOrdersOnScaffold; + + protected SARScaffold(StereoMolecule query, StereoMolecule core, int[] coreToQueryAtom, int[] queryToCoreAtom, SARScaffoldGroup scaffoldGroup) { + mQuery = query; + mCore = core; + mCoreToQueryAtom = coreToQueryAtom; + mQueryToCoreAtom = queryToCoreAtom; + mScaffoldGroup = scaffoldGroup; + mOldToNewMap = new TreeMap<>(); + mBridgeAtomRGroupCount = -1; + analyzeAtomBridgeExitVectors(coreToQueryAtom); + mHasSeenSubstituentOnScaffold = new boolean[getExitVectorCount()]; + mSeenBondOrdersOnScaffold = new int[getExitVectorCount()]; + } + + private void analyzeAtomBridgeExitVectors(int[] coreToQueryAtom) { + ArrayList evList = new ArrayList<>(); + for (int atom=0; atom 1) { + hasExitPiBond = true; + break; + } + } + if (hasExitPiBond + && !mol.isAtomStereoCenter(rootAtom)) { + for (int i=0; i 1) + || (exitVector.getIndex() == 1 && mol.getConnBondOrder(rootAtom, i) == 1))) + return connAtom; + } + } + + int count = 0; + for (int i=0; i getOldToNewMap() { + return mOldToNewMap; + } + + protected int assignRGroupsToBridgeAtoms(int firstBridgeAtomRGroup) { + if (mBridgeAtomRGroupCount == -1) { + mBridgeAtomRGroupCount = 0; + for (ExitVector exitVector:mBridgeAtomExitVector) + if (exitVector.substituentVaries()) + exitVector.setRGroupNo(++mBridgeAtomRGroupCount + firstBridgeAtomRGroup - 1); + } + return mBridgeAtomRGroupCount; + } + + protected void addRGroupsToCoreStructure() { + mScaffold = new StereoMolecule(mCore); + mScaffold.ensureHelperArrays(Molecule.cHelperNeighbours); + + double coreAVBL = mScaffold.getAverageBondLength(); + + int exitVectorCount = getExitVectorCount(); + boolean[] closureCovered = new boolean[exitVectorCount]; + for (int exitVectorIndex=0; exitVectorIndex attach an R group + ExitVector exitVector = getExitVector(exitVectorIndex); + if (exitVector.substituentVaries()) { + // But don't attach an R-group to one scaffold of a scaffold group, + // if that particular scaffold has never substituents at that position. + if (mHasSeenSubstituentOnScaffold[exitVectorIndex]) { + int rGroupNo = exitVector.getRGroupNo(); + int newAtom = mScaffold.addAtom((rGroupNo<=3) ? 141 + rGroupNo : 125 + rGroupNo); + int bondType = calculateExitVectorCoordsAndBondType(exitVectorIndex, mScaffold.getAtomCoordinates(newAtom)); + mScaffold.addBond(exitVector.getCoreAtom(mQueryToCoreAtom), newAtom, bondType); + } + } + else { // else => attach the non-varying substituent (if it is not null = 'unsubstituted') + if (!closureCovered[exitVectorIndex] && exitVector.getConstantSubstituent() != null) { + StereoMolecule substituent = new IDCodeParser(true).getCompactMolecule(exitVector.getConstantSubstituent()); + + // Substitutions, which connect back to the core fragment are decorated with atomicNo=0 atoms that + // carry a label with the respective exit vector index. Here we just copy those connection atoms, + // but mark the exit vector indexes as already attached (closureCovered) to avoid processing from the + // other end again. When all substituents are attached, we convert those labelled atoms into + // proper closure connections. + for (int atom=0; atom 0 && neighbourRank[index-1] > rank) { + neighbourRank[index] = neighbourRank[index-1]; + neighbourBond[index] = neighbourBond[index-1]; + neighbourAngle[index] = neighbourAngle[index-1]; + index--; + } + + neighbourRank[index] = rank; + neighbourBond[index] = connBond; + neighbourAngle[index] = mol.getBondAngle(rootAtom, connAtom); + + totalNeighbourCount++; + } + + if (totalNeighbourCount < 3 || totalNeighbourCount > 4) + return -1; + + int stereoType = (mol.getBondType(stereoBond) == Molecule.cBondTypeUp) ? 2 : 1; + + // Here we have one of the following neighbour counts in addition to the defined exitAtom: + // A: 1 neighbour that exists in core structure; 1 additional exit atom + // B: 2 neighbours that exists in core structure; no additional exit atom + // C: 2 neighbours that exists in core structure; 1 additional exit atom + // D: 3 neighbours that exists in core structure; no additional exit atom + + // Case A and B: + // 3 non-H neighbours at stereo center. + // Thus, we don't need to change up/down bond type when shifting stereo bond to other neighbour. + + // Cases C and D: + // 4 non-H neighbours at stereo center. + // Thus, we need to invert up/down bond type when shifting stereo bond to direct neighbour bond. + if (mol.getConnAtoms(rootAtom) == 4) { + if (otherExitAtomFound && stereoBond == neighbourBond[3]) { + if (areDirectNeighbours(neighbourAngle, 2, 3)) + stereoType = 3 - stereoType; + } + else if (stereoBond != exitBond) { + // if the stereo bond is one of the core bonds and if the exit bond + int stereoBondIndex = -1; + for (int i=0; i angle1) && (angle[i] < angle2)) + count++; + return count != 1; + } + + /** + * If a substituent can be attached to a core structure in two distinguishable ways regarding + * stereo configuration, then this method calculates in a reproducible way a topicity (0 or 1) + * reflecting a given up/down bond stereo type and the bond angles from the stereo center to + * all neighbour atoms. The bond angle array is expected to contain sorted core neighbours first, + * followed by one or two exit vector angles. The last exit vector is considered to carry the + * stereo bond. If in reality the stereo bond is a different one, then the calling method is + * responsible to compensate for a stereo bond shift and/or to compensate for the second exit + * vector by potentially inverting stereoType. + * Topicity is defined as follows:
+ * - 1 neighbour atoms in core structure and 2 exit atoms:
+ * If walking from first atom (lowest relevant index) via root atom to first exit atom + * making a left turn, then an up-bond connecting second exit atom gives topicity=1
+ * - 2 neighbour atoms in core structure and 1 or 2 exit atoms:
+ * If walking from first atom (lowest relevant index) via root atom to second atom + * making a left turn, then an up-bond connecting first/only exist atom gives topicity=1
+ * - 3 neighbour atoms in core structure and one exit atom:
+ * If neighbours 1,2,3 are in counter-clockwise order and forth neighbour is connected + * with an up-bond, then topicity=1
+ * @param angle bond angles at stereo center sorted by relevant atom index + * @param coreNeighbourCount number of bonds at stereo center that have a counterpart in the core structure + * @param totalNeighbourCount number of bonds at stereo center including a second exit atom + * @param stereoType 1 (down) or 2 (up); corrected if original stereo bond is not the exit bond or/and second exit atom exists + * @return topicity 0 or 1 + */ + private int calculateTHTopicity(double[] angle, int coreNeighbourCount, int totalNeighbourCount, int stereoType) { + for (int i=1; i Math.PI); + return leftTurn ^ (stereoType == 2) ? 1 : 0; + } + + boolean clockwise = (angle[1] > angle[2]); + return clockwise ^ (stereoType == 2) ? 1 : 0; + } + + private int calculateEZTopicity(StereoMolecule mol, int rootAtom, int exitAtom, int[] molToCoreAtom) { + if (mol.getAtomPi(rootAtom) != 1) + return -1; + + if (mol.getBondOrder(mol.getBond(rootAtom, exitAtom)) != 1) + return -1; + + int doubleBond = -1; + int rearDBAtom = -1; + for (int i=0; i candidate) { + oppositeAtom = candidate; + oppositeAngle = mol.getBondAngle(connAtom, rearDBAtom); + } + } + } + + if (oppositeAtom == Integer.MAX_VALUE) + return -1; + + double angleDif2 = Molecule.getAngleDif(oppositeAngle, dbAngle); + + return (angleDif1 < 0) ^ (angleDif2 < 0) ? 0 : 1; // E:0 Z:1 + } + + /** + * If coreRootAtom matches a stereo center in the molecule and if coreConnAtom is one of coreRootAtom's + * neighbours in the core structure, then this method returns for this neighbour that atom index, which + * is used to determine the topicity for any exit vector (neighbours in mol, which are not part of the + * core structure). Typically, we use query structure atom indexes for this, i.e. the relevant atom index + * of a coreConnAtom is the index of its correscponding atom in the query structure. If, however, + * coreConnAtom is an atom of a matching bridge bond, then there is no corresponding query structure atom. + * In that case we walk along the bridge bond atoms in the core structure until we hit an atom that exists + * in the query, which is the remote bridge bond atom, whose index is then returned. + * @param coreRootAtom + * @param coreConnAtom + * @return + */ + private int getTopicityRelevantAtomIndex(int coreRootAtom, int coreConnAtom) { + int queryRoot = mCoreToQueryAtom[coreRootAtom]; + + // If the stereo center itself is not part of the query, then it is within a bridge bond + // and does not exist in all scaffolds of the scaffold group. + // In this case we use atom index of the core rather than the query as reference. + if (queryRoot == -1) + return coreConnAtom; + + int queryAtom = mCoreToQueryAtom[coreConnAtom]; + if (queryAtom != -1) + return queryAtom; + + // If the stereo center neighbour in the core does not exist in the query, then it is part of + // a bridge bond. In this case we have to find that stereo center neighbour in the query + // that is connected with that bridge bond in the core, which copntains coreConnAtom. + // For that we build a graph from the core root atom adding only atoms that don't exist in + // the query until we hit a query core neighbour, which we return. + + int[] bridgeNeighbour = new int[mQuery.getConnAtoms(queryRoot)]; + int bridgeNeighbourCount = 0; + for (int i=0; i + * We define: If we have increasing atom indexes of query bonds in clockwise order, + * then topicity=0 is associated with an UP-bond and topicity=1 is associated with a DOWN-bond. + * @param exitVectorIndex + * @param coords receives suggested coordinates for first exit atom + * @return + */ + private int calculateExitVectorCoordsAndBondType(int exitVectorIndex, Coordinates coords) { + if ((mSeenBondOrdersOnScaffold[exitVectorIndex] & 2) == 0) + return ((mSeenBondOrdersOnScaffold[exitVectorIndex] & 4) == 0) ? 3 : 2; + + ExitVector exitVector = getExitVector(exitVectorIndex); + int rootAtom = exitVector.getCoreAtom(mQueryToCoreAtom); + + int[] neighbour = new int[3]; + double[] angle = new double[3]; + + int coreNeighbourCount = 0; + int piBondSum = 0; + for (int i=0; i 0 && neighbour[index-1] > neighbourAtom) { + neighbour[index] = neighbour[index-1]; + angle[index] = angle[index-1]; + index--; + } + + neighbour[index] = neighbourAtom; + angle[index] = mScaffold.getBondAngle(rootAtom, connAtom); + piBondSum += mScaffold.getConnBondOrder(rootAtom, i) - 1; + coreNeighbourCount++; + } + } + + if (piBondSum != 0) { + if (coreNeighbourCount == 1) + calculateSP2ExitVectorCoords(rootAtom, piBondSum, exitVector.getTopicity(), angle, coords); + else + calculateSP3ExitVectorCoords(rootAtom, Arrays.copyOf(angle, coreNeighbourCount), coords); + + // we assume that we don't have a stereo center with double bonds, e.g. at S or P + return Molecule.cBondTypeSingle; + } + + calculateSP3ExitVectorCoords(rootAtom, Arrays.copyOf(angle, coreNeighbourCount), coords); + + if (exitVector.getTopicity() == -1) + return Molecule.cBondTypeSingle; + + angle[coreNeighbourCount] = Molecule.getAngle(mScaffold.getAtomX(rootAtom), mScaffold.getAtomY(rootAtom), coords.x, coords.y); + + int totalNeighbourCount = coreNeighbourCount + 1; + + if (coreNeighbourCount == 1) { + angle[coreNeighbourCount+1] = angle[coreNeighbourCount] + Math.PI * 2 / 3; + totalNeighbourCount++; + } + + int topicity = calculateTHTopicity(angle, coreNeighbourCount, totalNeighbourCount, 1); + return (topicity == -1) ? Molecule.cBondTypeSingle : (topicity == exitVector.getTopicity()) ? Molecule.cBondTypeDown : Molecule.cBondTypeUp; + } + + /** + * If there are at least two existing neighbours (angle.length >= 2), this method + * places the new neighbour at a position furthest away from any existing neighbour + * using the scaffolds average bond length. If there is only one neighbour, the new + * neighbour will be placed with a bond angle 120 degrees larger. + * @param atom + * @param angle + * @param coords + */ + private void calculateSP3ExitVectorCoords(int atom, double[] angle, Coordinates coords) { + double exitAngle = Math.PI * 2 / 3; + + if (angle.length >= 2) { + Arrays.sort(angle); + double largestDiff = -1.0; + for (int i=0; i candidate) { + oppositeAtom = candidate; + oppositeAngle = mScaffold.getBondAngle(rearDBAtom, connAtom); + } + } + } + + double angleDif = Molecule.getAngleDif(oppositeAngle, dbAngle); + exitAngle = angle[0] + ((angleDif < 0) ^ (topicity == 1) ? 0.6667 : 1.3333) * Math.PI; + } + + double avbl = mScaffold.getAverageBondLength(); + coords.x = mScaffold.getAtomX(atom) + avbl * Math.sin(exitAngle); + coords.y = mScaffold.getAtomY(atom) + avbl * Math.cos(exitAngle); + } +} diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/SARScaffoldGroup.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/SARScaffoldGroup.java new file mode 100644 index 00000000..ab61d661 --- /dev/null +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/sar/SARScaffoldGroup.java @@ -0,0 +1,103 @@ +package com.actelion.research.chem.sar; + +import com.actelion.research.chem.Molecule; +import com.actelion.research.chem.StereoMolecule; + +import java.util.ArrayList; + +/** + * A scaffold group comprises all scaffolds that arise from a match of the same query substructure in multiple + * processed molecules. A scaffold group may contain more than one scaffold, if the substructure contains wild card + * elements like atom lists, or multiple allowed bond orders. A special case are bridge bonds, which cause a query + * match to contain more atoms than the query itself. Interestingly, these atoms may also carry R-groups, which needs + * to be handled differently from the R-groups on the core atoms. Core atoms are those atoms for which an associated + * atom exists in the query structure. R-groups (exit vectors) on core atom are numbered consistently within all + * scaffolds that belong to the same scaffold group, i.e. that were detected from the same query structure. + */ +public class SARScaffoldGroup { + private int mRGroupCount; + private ExitVector[] mExitVector; + private ArrayList mScaffoldList; + + protected SARScaffoldGroup(StereoMolecule query) { + super(); + mRGroupCount = -1; + analyzeExitVectors(query); + mScaffoldList = new ArrayList<>(); + } + + public void addScaffold(SARScaffold scaffold) { + mScaffoldList.add(scaffold); + } + + public ArrayList getScaffoldList() { + return mScaffoldList; + } + + private void analyzeExitVectors(StereoMolecule query) { + ArrayList evList = new ArrayList<>(); + for (int atom=0; atom= two exit vectors (with both one and two connAtoms in the query) + // we assume that we can distinuish the exit vectors by topicity (any stereo criteria) + int topicity = (exitVectorCount >= 2) ? i : -1; + evList.add(new ExitVector(atom, true, i, topicity)); + } + } + } + mExitVector = evList.toArray(new ExitVector[0]); + } + + protected int getExitVectorCount() { + return mExitVector.length; + } + + /** + * Find correct exit vector index defining the core atom and for it either the exit vectors topicity + * (if exit vectors are stereo-heterotop) or just an index (if exit vectors are homotop) + * @param queryAtom respective atom index of query + * @param connIndex 0-based exo-query neighbour index (not used if topicity != -1 and neighbours are stereotop + * @param topicity -1, if exit vector neighbours are homotop, otherise 0 or 1 + * @return index into list of all exit vectors of scaffold + */ + protected int getExitVectorIndex(int queryAtom, int connIndex, int topicity) { + for (int i=0; i { @@ -107,8 +108,7 @@ public int getHeight() { private void init() { mImageNormal = mArea.getUIHelper().createImage("editorButtons.png"); - if (LookAndFeelHelper.isDarkLookAndFeel()) - HiDPIHelper.adaptForLookAndFeel(mImageNormal); + HiDPIIcon.adaptForLookAndFeel(mImageNormal); mImageDisabled = mArea.getUIHelper().createImage("editorButtons.png"); HiDPIHelper.disableImage(mImageDisabled); diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ChemistryHelper.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/share/gui/ChemistryGeometryHelper.java similarity index 94% rename from src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ChemistryHelper.java rename to src/com/actelion/research/gwt/chemlib/com/actelion/research/share/gui/ChemistryGeometryHelper.java index c643716a..dcceb26b 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ChemistryHelper.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/share/gui/ChemistryGeometryHelper.java @@ -31,25 +31,25 @@ * */ -package com.actelion.research.chem; +package com.actelion.research.share.gui; +import com.actelion.research.chem.AbstractDepictor; +import com.actelion.research.chem.ExtendedMolecule; +import com.actelion.research.chem.Molecule; +import com.actelion.research.chem.StereoMolecule; import com.actelion.research.chem.reaction.Reaction; import com.actelion.research.gui.generic.GenericRectangle; import java.awt.*; -public class ChemistryHelper +public class ChemistryGeometryHelper { public static final int REACTION_TYPE_NOMOLS = 0; public static final int REACTION_TYPE_NOPRODUCTS = 1; public static final int REACTION_TYPE_REACTANTS = 2; public static final int REACTION_TYPE_NORMAL = 3; - private ChemistryHelper() - { - } - public static int getReactionType(Reaction r) { int mols = r.getMolecules(); @@ -160,7 +160,7 @@ public static void setAverageBondLength(Reaction rxn, double bndlen) ExtendedMolecule m = rxn.getMolecule(fragment); dx = m.getAverageBondLength(); double scale = bndlen / dx; - ChemistryHelper.transformMolecule(m,0,0,scale); + ChemistryGeometryHelper.transformMolecule(m,0,0,scale); } } @@ -311,8 +311,8 @@ public static void scaleIntoF(Reaction reaction, double x, double y, double widt if (rr != null) { double cx = -rr.x; double cy = -rr.y; - ChemistryHelper.transformReaction(reaction, cx, cy, 1); - rr = ChemistryHelper.getBoundingRect(reaction, true); + ChemistryGeometryHelper.transformReaction(reaction, cx, cy, 1); + rr = ChemistryGeometryHelper.getBoundingRect(reaction, true); double sumWidth = rr.getWidth(), sumHeight = rr.getHeight(); @@ -324,7 +324,7 @@ public static void scaleIntoF(Reaction reaction, double x, double y, double widt scale = scV; // // System.out.print("Scaleinto scale %s\n",scale); - ChemistryHelper.transformReaction(reaction, 0, 0, scH); + ChemistryGeometryHelper.transformReaction(reaction, 0, 0, scH); } } @@ -343,7 +343,7 @@ public static void scaleInto(Reaction reaction, double x, double y, double width ExtendedMolecule m = reaction.getMolecule(i); if (m.getAllAtoms() > 1) { - GenericRectangle r = ChemistryHelper.getBoundingRect(m); + GenericRectangle r = ChemistryGeometryHelper.getBoundingRect(m); if (r != null) { // // System.out.print("MoleculeID %s bounds: %s\n",System.identityHashCode(m),r); sumHeight += r.getHeight(); @@ -506,13 +506,13 @@ public static void arrangeReaction(Reaction rxn,Dimension size) for (int fragment=0; fragment list) { return res; } + public final static byte [] toByteArray(List li) { + byte[] res = new byte[li.size()]; + int index = 0; + Iterator iter = li.iterator(); + while(iter.hasNext()) { + res[index++] = iter.next(); + } + return res; + } + public final static int indexOf(Object[] array, Object obj) { for (int i = 0; i < array.length; i++) { if(array[i].equals(obj)) return i; diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderFloatingPointNumbers.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderFloatingPointNumbers.java index 90130c14..d0075038 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderFloatingPointNumbers.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderFloatingPointNumbers.java @@ -4,6 +4,8 @@ import com.actelion.research.util.datamodel.DoubleArray; import com.actelion.research.util.datamodel.IntArray; +import java.nio.charset.StandardCharsets; + /* * Copyright (c) 1997 - 2016 * Actelion Pharmaceuticals Ltd. @@ -285,7 +287,7 @@ private String encode() { int [] data = finalizeAndGet(); - String strData = new String(new DescriptorEncoder().encode(data)); + String strData = new String(new DescriptorEncoder().encode(data), StandardCharsets.UTF_8); return strData; } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderIntegerNumbers.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderIntegerNumbers.java index 084191e9..14e93240 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderIntegerNumbers.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/EncoderIntegerNumbers.java @@ -2,9 +2,9 @@ import com.actelion.research.calc.Logarithm; import com.actelion.research.chem.descriptor.DescriptorEncoder; -import com.actelion.research.util.datamodel.DoubleArray; import com.actelion.research.util.datamodel.IntArray; +import java.nio.charset.StandardCharsets; import java.util.Random; /** @@ -170,7 +170,7 @@ private String encode() { int [] data = finalizeAndGet(); - String strData = new String(new DescriptorEncoder().encode(data)); + String strData = new String(new DescriptorEncoder().encode(data), StandardCharsets.UTF_8); return strData; } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/ListUtils.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/ListUtils.java index 7287368a..6322201d 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/ListUtils.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/ListUtils.java @@ -52,6 +52,19 @@ public static String toString(List li){ return sb.toString(); } + public static String toStringInteger(List li){ + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < li.size(); i++) { + sb.append(li.get(i)); + if(i createIndexList(int n){ List liIndex = new ArrayList<>(n); diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/IntArray.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/IntArray.java index 05c25f78..2ccee464 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/IntArray.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/IntArray.java @@ -483,7 +483,7 @@ public static boolean equals(int [] a, int [] b){ } public static List toList(int [] a) { - List li = new ArrayList(a.length); + List li = new ArrayList<>(a.length); for (int i = 0; i < a.length; i++) { li.add(a[i]); diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/table/TableModelString.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/table/TableModelString.java index b6dc8bef..0e1a7566 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/table/TableModelString.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/util/datamodel/table/TableModelString.java @@ -398,22 +398,23 @@ public void write(File fiTxt, String rowName) throws IOException { List liData = liliData.get(i); - for (int j = 0; j < liData.size(); j++) { - bw.write(liData.get(j)); - if(iA dedicated (systematic, biased or random) torsion set strategy delivers collision-free torsion sets, i.e. conformers. *

* For generating conformers in multiple threads, every thread needs its own ConformerGenerator instance. - * If they use a RigidFragmentCache, then the cache is shared among all ConformerGenerators. + * If they use a RigidFragmentCache, then the cache is shared among all ConformerGenerators.
+ * Important: Input molecules should contain absolute stereo centers. If they contain undefined or ESR type '&' or 'or' + * stereo centers, then a ConformerGenerator randomly takes one of the possible stereo isomers and generates conformers + * for that. If you want conformers for all possible stereo isomers of a molecules with non-absolute stereo centers, + * you should use a StereoIsomerEnumerator to produce all possible stereo isomers and then produce conformers for every + * one of them. If half of a set of stereo isomers consists of the enantiomers of the other half, then it is advisable + * to generate conformes for one half only and to generate the second half by just mirroring the first halfs coordinates. + * To do that use option skipEnantiomers==true create a mirrored set of conformers, if isSkippingEnantiomers() of the + * StereoIsomerEnumerator returns true. */ public class ConformerGenerator { public static final int STRATEGY_LIKELY_SYSTEMATIC = 1; @@ -93,7 +99,8 @@ public class ConformerGenerator { private ConformerSetDiagnostics mDiagnostics; /** - * Adds explicit hydrogen atoms where they are implicit by filling valences + * Assuming that the given molecule has 2D-coordinates, this method + * converts all implicit hydrogen atoms into explicit ones by filling valences * and adapting for atom charges. New hydrogen atoms receive new 2D-coordinates * by equally locating them between those two neighbors with the widest angle between * their bonds. Any stereo configurations deducible from 2D-coordinates are retained. diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentCache.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentCache.java index 6f9c8c2d..7b282180 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentCache.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentCache.java @@ -37,6 +37,7 @@ import com.actelion.research.util.DoubleFormat; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.TreeSet; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ConcurrentHashMap; @@ -151,7 +152,7 @@ private void loadCache(BufferedReader br) throws Exception { for (int i=0; i ATOM_FLAT_RING_BREAKOUT_STRAIN) { - if (tryEscapeFromFlatRingTrap(conformer, atom)) { + if (atomStrain > ATOM_FLAT_RING_BREAKOUT_STRAIN + && tryEscapeFromFlatRingTrap(conformer, atom)) atomCount++; - continue; - } - } - if (atomStrain > ATOM_CAGE_BREAKOUT_STRAIN) { - if (mDWWriter != null) { - try { - writeStrains(conformer, null, "escapeCage", atomStrain, Double.NaN); + } + + if (atomCount == 0) { + int neighbourCount = 16; + for (int atom=0; atom mMol.getAllConnAtoms(atom) + && conformer.getAtomStrain(atom) > maxCageBreakoutStrain(atom)) + neighbourCount = mMol.getAllConnAtoms(atom); + + if (neighbourCount != 16) { + for (int atom=0; atom maxCageBreakoutStrain(atom)) { +//System.out.println("escape "+neighbourCount+" neighbours"); +//System.out.print("strains: "); for (int i=0; i * - has only one neighbour
diff --git a/src/com/actelion/research/gwt/core/JSMoleculeProperties.java b/src/com/actelion/research/gwt/core/JSMoleculeProperties.java index 4e62003e..a80d54fa 100644 --- a/src/com/actelion/research/gwt/core/JSMoleculeProperties.java +++ b/src/com/actelion/research/gwt/core/JSMoleculeProperties.java @@ -1,6 +1,6 @@ package com.actelion.research.gwt.core; -import com.actelion.research.chem.PropertyCalculator; +import com.actelion.research.chem.prediction.PropertyCalculator; import com.actelion.research.gwt.minimal.JSMolecule; import com.google.gwt.core.client.JavaScriptObject; import jsinterop.annotations.*; diff --git a/src/com/actelion/research/gwt/gui/editor/GWTGeomFactory.java b/src/com/actelion/research/gwt/gui/editor/GWTGeomFactory.java index 0ca10c92..b76ecd89 100644 --- a/src/com/actelion/research/gwt/gui/editor/GWTGeomFactory.java +++ b/src/com/actelion/research/gwt/gui/editor/GWTGeomFactory.java @@ -1,11 +1,11 @@ package com.actelion.research.gwt.gui.editor; -import com.actelion.research.chem.ChemistryHelper; import com.actelion.research.chem.StereoMolecule; import com.actelion.research.gui.generic.GenericRectangle; import com.actelion.research.gwt.gui.editor.actions.dialogs.AtomPropertiesDialog; import com.actelion.research.gwt.gui.editor.actions.dialogs.AtomQueryFeaturesDialog; import com.actelion.research.gwt.gui.editor.actions.dialogs.BondQueryFeaturesDialog; +import com.actelion.research.share.gui.ChemistryGeometryHelper; import com.actelion.research.share.gui.DrawConfig; import com.actelion.research.share.gui.editor.chem.IArrow; import com.actelion.research.share.gui.editor.dialogs.IAtomPropertiesDialog; @@ -41,7 +41,7 @@ public IAtomPropertiesDialog createAtomPropertiesDialog(StereoMolecule m, int at @Override public GenericRectangle getBoundingRect(StereoMolecule m) { - return ChemistryHelper.getBoundingRect(m); + return ChemistryGeometryHelper.getBoundingRect(m); } public IKeyCode getDeleteKey() { diff --git a/src/com/actelion/research/gwt/minimal/JSMolecule.java b/src/com/actelion/research/gwt/minimal/JSMolecule.java index 64f8446b..be61a958 100644 --- a/src/com/actelion/research/gwt/minimal/JSMolecule.java +++ b/src/com/actelion/research/gwt/minimal/JSMolecule.java @@ -496,6 +496,7 @@ public StereoMolecule getStereoMolecule() { // considering also cBondQFBondTypes // in query + public static final int cHelperAll = 0x00FF; public static final int cHelperNone = 0x0000; public static final int cHelperBitNeighbours = 0x0001; public static final int cHelperBitRingsSimple = 0x0002; // small rings only, no aromaticity, no diff --git a/types.d.ts b/types.d.ts index ff18d364..b8414cd6 100644 --- a/types.d.ts +++ b/types.d.ts @@ -291,6 +291,7 @@ export declare class Molecule { static cBondQFNotAromatic: number; static cBondQFMatchFormalOrder: number; + static cHelperAll: number; static cHelperNone: number; static cHelperBitNeighbours: number; static cHelperBitRingsSimple: number;