5 from ost
import LogWarning, LogScript, LogVerbose, LogDebug
9 """ Scorer to compute various small molecule ligand (non polymer) scores.
14 - Python modules `numpy` and `networkx` must be available
15 (e.g. use ``pip install numpy networkx``)
17 :class:`LigandScorer` is an abstract base class dealing with all the setup,
18 data storage, enumerating ligand symmetries and target/model ligand
19 matching/assignment. But actual score computation is delegated to child
22 At the moment, two such classes are available:
24 * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
25 that assesses the conservation of protein-ligand
27 * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
28 that computes a binding-site superposed, symmetry-corrected RMSD.
30 All versus all scores are available through the lazily computed
31 :attr:`score_matrix`. However, many things can go wrong... be it even
32 something as simple as two ligands not matching. Error states therefore
33 encode scoring issues. An Issue for a particular ligand is indicated by a
34 non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
35 This invalidates pairwise scores of such a ligand with all other ligands.
36 This and other issues in pairwise score computation are reported in
37 :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
38 Only if the respective location is 0, a valid pairwise score can be
39 expected. The states and their meaning can be explored with code::
41 for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
46 A common use case is to derive a one-to-one mapping between ligands in
47 the model and the target for which :class:`LigandScorer` provides an
48 automated assignment procedure.
49 By default, only exact matches between target and model ligands are
50 considered. This is a problem when the target only contains a subset
51 of the expected atoms (for instance if atoms are missing in an
52 experimental structure, which often happens in the PDB). With
53 `substructure_match=True`, complete model ligands can be scored against
54 partial target ligands. One problem with this approach is that it is
55 very easy to find good matches to small, irrelevant ligands like EDO, CO2
56 or GOL. The assignment algorithm therefore considers the coverage,
57 expressed as the fraction of atoms of the model ligand atoms covered in the
58 target. Higher coverage matches are prioritized, but a match with a better
59 score will be preferred if it falls within a window of `coverage_delta`
60 (by default 0.2) of a worse-scoring match. As a result, for instance,
61 with a delta of 0.2, a low-score match with coverage 0.96 would be
62 preferred over a high-score match with coverage 0.70.
66 :class:`LigandScorer` generally assumes that the
67 :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all
68 the ligand atoms, and only ligand atoms. This is typically the case for
69 entities loaded from mmCIF (tested with mmCIF files from the PDB and
70 SWISS-MODEL). Legacy PDB files must contain `HET` headers (which is usually
71 the case for files downloaded from the PDB but not elsewhere).
73 The class doesn't perform any cleanup of the provided structures.
74 It is up to the caller to ensure that the data is clean and suitable for
75 scoring. :ref:`Molck <molck>` should be used with extra
76 care, as many of the options (such as `rm_non_std` or `map_nonstd_res`) can
77 cause ligands to be removed from the structure. If cleanup with Molck is
78 needed, ligands should be kept aside and passed separately. Non-ligand
79 residues should be valid compounds with atom names following the naming
80 conventions of the component dictionary. Non-standard residues are
81 acceptable, and if the model contains a standard residue at that position,
82 only atoms with matching names will be considered.
84 Unlike most of OpenStructure, this class does not assume that the ligands
85 (either in the model or the target) are part of the PDB component
86 dictionary. They may have arbitrary residue names. Residue names do not
87 have to match between the model and the target. Matching is based on
88 the calculation of isomorphisms which depend on the atom element name and
89 atom connectivity (bond order is ignored).
90 It is up to the caller to ensure that the connectivity of atoms is properly
91 set before passing any ligands to this class. Ligands with improper
92 connectivity will lead to bogus results.
94 Note, however, that atom names should be unique within a residue (ie two
95 distinct atoms cannot have the same atom name).
97 This only applies to the ligand. The rest of the model and target
98 structures (protein, nucleic acids) must still follow the usual rules and
99 contain only residues from the compound library.
101 Although it isn't a requirement, hydrogen atoms should be removed from the
102 structures. Here is an example code snippet that will perform a reasonable
103 cleanup. Keep in mind that this is most likely not going to work as
104 expected with entities loaded from PDB files, as the `is_ligand` flag is
105 probably not set properly.
107 Here is an example of how to use setup a scorer code::
109 from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
110 from ost.mol.alg import Molck, MolckSettings
113 # Structure model in PDB format, containing the receptor only
114 model = io.LoadPDB("path_to_model.pdb")
115 # Ligand model as SDF file
116 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
117 # Target loaded from mmCIF, containing the ligand
118 target = io.LoadMMCIF("path_to_target.cif")
120 # Cleanup a copy of the structures
121 cleaned_model = model.Copy()
122 cleaned_target = target.Copy()
123 molck_settings = MolckSettings(rm_unk_atoms=True,
127 rm_zero_occ_atoms=False,
129 map_nonstd_res=False,
131 Molck(cleaned_model, conop.GetDefaultLib(), molck_settings)
132 Molck(cleaned_target, conop.GetDefaultLib(), molck_settings)
134 # Setup scorer object and compute lDDT-PLI
135 model_ligands = [model_ligand.Select("ele != H")]
136 sc = SCRMSDScorer(cleaned_model, cleaned_target, model_ligands)
138 # Perform assignment and read respective scores
139 for lig_pair in sc.assignment:
140 trg_lig = sc.target_ligands[lig_pair[0]]
141 mdl_lig = sc.model_ligands[lig_pair[1]]
142 score = sc.score_matrix[lig_pair[0], lig_pair[1]]
143 print(f"Score for {trg_lig} and {mdl_lig}: {score}")
145 :param model: Model structure - a deep copy is available as :attr:`model`.
146 No additional processing (ie. Molck), checks,
147 stereochemistry checks or sanitization is performed on the
148 input. Hydrogen atoms are kept.
149 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
150 :param target: Target structure - a deep copy is available as
151 :attr:`target`. No additional processing (ie. Molck), checks
152 or sanitization is performed on the input. Hydrogen atoms are
154 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
155 :param model_ligands: Model ligands, as a list of
156 :class:`~ost.mol.ResidueHandle` belonging to the model
157 entity. Can be instantiated with either a :class:list
158 of :class:`~ost.mol.ResidueHandle`/
159 :class:`ost.mol.ResidueView` or of
160 :class:`ost.mol.EntityHandle`/
161 :class:`ost.mol.EntityView`.
162 If `None`, ligands will be extracted based on the
163 :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
164 normally set properly in entities loaded from mmCIF).
165 :type model_ligands: :class:`list`
166 :param target_ligands: Target ligands, as a list of
167 :class:`~ost.mol.ResidueHandle` belonging to the
168 target entity. Can be instantiated either a
169 :class:list of :class:`~ost.mol.ResidueHandle`/
170 :class:`ost.mol.ResidueView` or of
171 :class:`ost.mol.EntityHandle`/
172 :class:`ost.mol.EntityView` containing a single
173 residue each. If `None`, ligands will be extracted
174 based on the :attr:`~ost.mol.ResidueHandle.is_ligand`
175 flag (this is normally set properly in entities
177 :type target_ligands: :class:`list`
178 :param resnum_alignments: Whether alignments between chemically equivalent
179 chains in *model* and *target* can be computed
180 based on residue numbers. This can be assumed in
181 benchmarking setups such as CAMEO/CASP.
182 :type resnum_alignments: :class:`bool`
183 :param rename_ligand_chain: If a residue with the same chain name and
184 residue number than an explicitly passed model
185 or target ligand exits in the structure,
186 and `rename_ligand_chain` is False, a
187 RuntimeError will be raised. If
188 `rename_ligand_chain` is True, the ligand will
189 be moved to a new chain instead, and the move
190 will be logged to the console with SCRIPT
192 :type rename_ligand_chain: :class:`bool`
193 :param substructure_match: Set this to True to allow incomplete (i.e.
194 partially resolved) target ligands.
195 :type substructure_match: :class:`bool`
196 :param coverage_delta: the coverage delta for partial ligand assignment.
197 :type coverage_delta: :class:`float`
198 :param max_symmetries: If more than that many isomorphisms exist for
199 a target-ligand pair, it will be ignored and reported
201 :type max_symmetries: :class:`int`
204 def __init__(self, model, target, model_ligands=None, target_ligands=None,
205 resnum_alignments=False, rename_ligand_chain=False,
206 substructure_match=False, coverage_delta=0.2,
210 self.
modelmodel = mol.CreateEntityFromView(model,
False)
212 self.
modelmodel = model.Copy()
214 raise RuntimeError(
"model must be of type EntityView/EntityHandle")
217 self.
targettarget = mol.CreateEntityFromView(target,
False)
219 self.
targettarget = target.Copy()
221 raise RuntimeError(
"target must be of type EntityView/EntityHandle")
224 if target_ligands
is None:
231 LogWarning(
"No ligands in the target")
234 if model_ligands
is None:
241 LogWarning(
"No ligands in the model")
243 raise ValueError(
"No ligand in the model and in the target")
278 iso =
"subgraph isomorphism"
280 iso =
"full graph isomorphism"
284 1: (
"identity", f
"Ligands could not be matched (by {iso})"),
285 2: (
"symmetries",
"Too many symmetries between ligand atoms were "
286 "found - increasing max_symmetries might help"),
287 3: (
"no_iso",
"No fully isomorphic match could be found - enabling "
288 "substructure_match might allow a match"),
289 4: (
"disconnected",
"Ligand graph is disconnected"),
290 5: (
"stoichiometry",
"Ligand was already assigned to another ligand "
291 "(different stoichiometry)"),
292 6: (
"single_ligand_issue",
"Cannot compute valid pairwise score as "
293 "either model or target ligand have non-zero state."),
294 9: (
"unknown",
"An unknown error occured in LigandScorer")}
298 """ Encodes states of ligand pairs
300 Ligand pairs can be matched and a valid score can be expected if
301 respective location in this matrix is 0.
302 Target ligands are in rows, model ligands in columns. States are encoded
303 as integers <= 9. Larger numbers encode errors for child classes.
304 Use something like ``self.state_decoding[3]`` to get a decscription.
306 :rtype: :class:`~numpy.ndarray`
314 """ Encodes states of model ligands
316 Non-zero state in any of the model ligands invalidates the full
317 respective column in :attr:`~state_matrix`.
319 :rtype: :class:`~numpy.ndarray`
327 """ Encodes states of target ligands
329 Non-zero state in any of the target ligands invalidates the full
330 respective row in :attr:`~state_matrix`.
332 :rtype: :class:`~numpy.ndarray`
340 """ Get the matrix of scores.
342 Target ligands are in rows, model ligands in columns.
344 NaN values indicate that no value could be computed (i.e. different
345 ligands). In other words: values are only valid if the respective
346 location in :attr:`~state_matrix` is 0.
348 :rtype: :class:`~numpy.ndarray`
356 """ Get the matrix of model ligand atom coverage in the target.
358 Target ligands are in rows, model ligands in columns.
360 NaN values indicate that no value could be computed (i.e. different
361 ligands). In other words: values are only valid if the respective
362 location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
363 only full match isomorphisms are considered, and therefore only values
364 of 1.0 can be observed.
366 :rtype: :class:`~numpy.ndarray`
374 """ Get the matrix of scorer specific auxiliary data.
376 Target ligands are in rows, model ligands in columns.
378 Auxiliary data consists of arbitrary data dicts which allow a child
379 class to provide additional information for a scored ligand pair.
380 empty dictionaries indicate that the child class simply didn't return
381 anything or that no value could be computed (e.g. different ligands).
382 In other words: values are only valid if respective location in the
383 :attr:`~state_matrix` is 0.
385 :rtype: :class:`~numpy.ndarray`
393 """ Ligand assignment based on computed scores
395 Implements a greedy algorithm to assign target and model ligands
396 with each other. Starts from each valid ligand pair as indicated
397 by a state of 0 in :attr:`state_matrix`. Each iteration first selects
398 high coverage pairs. Given max_coverage defined as the highest
399 coverage observed in the available pairs, all pairs with coverage
400 in [max_coverage-*coverage_delta*, max_coverage] are selected.
401 The best scoring pair among those is added to the assignment
402 and the whole process is repeated until there are no ligands to
405 :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
413 for trg_idx
in range(self.
score_matrixscore_matrix.shape[0]):
414 for mdl_idx
in range(self.
score_matrixscore_matrix.shape[1]):
415 if self.
state_matrixstate_matrix[trg_idx, mdl_idx] == 0:
416 tmp.append((self.
score_matrixscore_matrix[trg_idx, mdl_idx],
422 tmp.sort(reverse=
True)
426 raise RuntimeError(
"LigandScorer._score_dir must return one in "
431 coverage_thresh = max([x[1]
for x
in tmp]) - self.
coverage_deltacoverage_delta
432 top_coverage = [x
for x
in tmp
if x[1] >= coverage_thresh]
435 a = top_coverage[0][2]
436 b = top_coverage[0][3]
440 tmp = [x
for x
in tmp
if (x[2] != a
and x[3] != b)]
446 """ Get a dictionary of score values, keyed by model ligand
448 Extract score with something like:
449 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
450 The returned scores are based on :attr:`~assignment`.
452 :rtype: :class:`dict`
456 for (trg_lig_idx, mdl_lig_idx)
in self.
assignmentassignment:
458 cname = mdl_lig.GetChain().GetName()
459 rnum = mdl_lig.GetNumber()
462 score = self.
score_matrixscore_matrix[trg_lig_idx, mdl_lig_idx]
468 """ Get a dictionary of score details, keyed by model ligand
470 Extract dict with something like:
471 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
472 The returned info dicts are based on :attr:`~assignment`. The content is
473 documented in the respective child class.
475 :rtype: :class:`dict`
479 for (trg_lig_idx, mdl_lig_idx)
in self.
assignmentassignment:
481 cname = mdl_lig.GetChain().GetName()
482 rnum = mdl_lig.GetNumber()
485 d = self.
aux_matrixaux_matrix[trg_lig_idx, mdl_lig_idx]
491 """ Get indices of target ligands which are not assigned
493 :rtype: :class:`list` of :class:`int`
496 assigned = set([x[0]
for x
in self.
assignmentassignment])
497 return [x
for x
in range(len(self.
target_ligandstarget_ligands))
if x
not in assigned]
501 """ Get indices of model ligands which are not assigned
503 :rtype: :class:`list` of :class:`int`
506 assigned = set([x[1]
for x
in self.
assignmentassignment])
507 return [x
for x
in range(len(self.
model_ligandsmodel_ligands))
if x
not in assigned]
510 """ Get summary of states observed with respect to all model ligands
512 Mainly for debug purposes
514 :param trg_lig_idx: Index of target ligand for which report should be
516 :type trg_lig_idx: :class:`int`
522 """ Get summary of states observed with respect to all target ligands
524 Mainly for debug purposes
526 :param mdl_lig_idx: Index of model ligand for which report should be
528 :type mdl_lig_idx: :class:`int`
533 def _get_report(self, ligand_state, pair_states):
537 for s
in np.unique(pair_states):
539 indices = np.flatnonzero(pair_states == s).tolist()
540 pair_report.append({
"state": s,
541 "short desc": desc[0],
546 ligand_report = {
"state": ligand_state,
547 "short desc": desc[0],
550 return (ligand_report, pair_report)
553 """ Makes an educated guess why target ligand is not assigned
555 This either returns actual error states or custom states that are
558 :param trg_lig_idx: Index of target ligand
559 :type trg_lig_idx: :class:`int`
560 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
561 sentence describing the issue, (\"unknown\",\"unknown\") if
562 nothing obvious can be found.
563 :raises: :class:`RuntimeError` if specified target ligand is assigned
566 raise RuntimeError(
"Specified target ligand is not unassigned")
570 return (
"no_ligand",
"No ligand in the model")
578 tmp = np.unique(self.
state_matrixstate_matrix[trg_lig_idx,:])
584 return (
"stoichiometry",
585 "Ligand was already assigned to an other "
586 "model ligand (different stoichiometry)")
595 mdl_idx = np.where(self.
state_matrixstate_matrix[trg_lig_idx,:]==6)[0]
600 raise RuntimeError(
"This should never happen")
605 if 6
in tmp
and len(tmp) > 1:
609 if 1
in tmp
and len(tmp) > 1:
617 """ Makes an educated guess why model ligand is not assigned
619 This either returns actual error states or custom states that are
622 :param mdl_lig_idx: Index of model ligand
623 :type mdl_lig_idx: :class:`int`
624 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
625 sentence describing the issue, (\"unknown\",\"unknown\") if
626 nothing obvious can be found.
627 :raises: :class:`RuntimeError` if specified model ligand is assigned
630 raise RuntimeError(
"Specified model ligand is not unassigned")
634 return (
"no_ligand",
"No ligand in the target")
642 tmp = np.unique(self.
state_matrixstate_matrix[:,mdl_lig_idx])
648 return (
"stoichiometry",
649 "Ligand was already assigned to an other "
650 "model ligand (different stoichiometry)")
659 trg_idx = np.where(self.
state_matrixstate_matrix[:,mdl_lig_idx]==6)[0]
664 raise RuntimeError(
"This should never happen")
669 if 6
in tmp
and len(tmp) > 1:
673 if 1
in tmp
and len(tmp) > 1:
684 cname = lig.GetChain().GetName()
685 rnum = lig.GetNumber()
686 if cname
not in return_dict:
687 return_dict[cname] = dict()
688 return_dict[cname][rnum] = \
697 cname = lig.GetChain().GetName()
698 rnum = lig.GetNumber()
699 if cname
not in return_dict:
700 return_dict[cname] = dict()
701 return_dict[cname][rnum] = \
706 def _chain_mapper(self):
707 """ Chain mapper object for the given :attr:`target`.
709 Can be used by child classes if needed, constructed with
710 *resnum_alignments* flag
712 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
722 def _extract_ligands(entity):
723 """Extract ligands from entity. Return a list of residues.
725 Assumes that ligands have the :attr:`~ost.mol.ResidueHandle.is_ligand`
726 flag set. This is typically the case for entities loaded from mmCIF
727 (tested with mmCIF files from the PDB and SWISS-MODEL).
728 Legacy PDB files must contain `HET` headers (which is usually the
729 case for files downloaded from the PDB but not elsewhere).
731 This function performs basic checks to ensure that the residues in this
732 chain are not forming polymer bonds (ie peptide/nucleotide ligands) and
733 will raise a RuntimeError if this assumption is broken.
735 :param entity: the entity to extract ligands from
736 :type entity: :class:`~ost.mol.EntityHandle`
737 :rtype: :class:`list` of :class:`~ost.mol.ResidueHandle`
740 extracted_ligands = []
741 for residue
in entity.residues:
742 if residue.is_ligand:
743 if mol.InSequence(residue, residue.next):
744 raise RuntimeError(
"Residue %s connected in polymer sequen"
745 "ce %s" % (residue.qualified_name))
746 extracted_ligands.append(residue)
747 LogVerbose(
"Detected residue %s as ligand" % residue)
748 return extracted_ligands
751 def _prepare_ligands(new_entity, old_entity, ligands, rename_chain):
752 """Prepare the ligands given into a list of ResidueHandles which are
753 part of the copied entity, suitable for the model_ligands and
754 target_ligands properties.
756 This function takes a list of ligands as (Entity|Residue)(Handle|View).
757 Entities can contain multiple ligands, which will be considered as
760 Ligands which are part of the entity are simply fetched in the new
761 copied entity. Otherwise, they are copied over to the copied entity.
763 extracted_ligands = []
768 def _copy_residue(residue, rename_chain):
769 """ Copy the residue into the new chain.
770 Return the new residue handle."""
771 nonlocal next_chain_num, new_editor
774 if new_editor
is None:
775 new_editor = new_entity.EditXCS()
777 new_chain = new_entity.FindChain(residue.chain.name)
778 if not new_chain.IsValid():
779 new_chain = new_editor.InsertChain(residue.chain.name)
780 new_editor.SetChainType(new_chain,
781 mol.ChainType.CHAINTYPE_NON_POLY)
784 already_exists = new_chain.FindResidue(residue.number).IsValid()
790 residue.chain.name +
"_" + str(chain_ext)
791 new_chain = new_entity.FindChain(new_chain_name)
792 if new_chain.IsValid():
797 new_editor.InsertChain(new_chain_name)
799 LogScript(
"Moved ligand residue %s to new chain %s" % (
800 residue.qualified_name, new_chain.name))
803 "A residue number %s already exists in chain %s" % (
804 residue.number, residue.chain.name)
805 raise RuntimeError(msg)
808 new_res = new_editor.AppendResidue(new_chain, residue.name,
811 for old_atom
in residue.atoms:
812 new_editor.InsertAtom(new_res, old_atom.name, old_atom.pos,
813 element=old_atom.element, occupancy=old_atom.occupancy,
814 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
816 for old_atom
in residue.atoms:
817 for old_bond
in old_atom.bonds:
818 new_first = new_res.FindAtom(old_bond.first.name)
819 new_second = new_res.FindAtom(old_bond.second.name)
820 new_editor.Connect(new_first, new_second)
823 def _process_ligand_residue(res, rename_chain):
824 """Copy or fetch the residue. Return the residue handle."""
826 if res.entity.handle == old_entity.handle:
830 new_res = new_entity.FindResidue(res.chain.name, res.number)
831 if new_res
and new_res.valid:
832 qname = res.handle.qualified_name
833 LogVerbose(
"Ligand residue %s already in entity" % qname)
836 new_res = _copy_residue(res, rename_chain)
837 qname = res.handle.qualified_name
838 LogVerbose(
"Copied ligand residue %s" % qname)
839 new_res.SetIsLigand(
True)
842 for ligand
in ligands:
848 for residue
in ligand.residues:
849 new_residue = _process_ligand_residue(residue, rename_chain)
850 extracted_ligands.append(new_residue)
852 new_residue = _process_ligand_residue(ligand, rename_chain)
853 extracted_ligands.append(new_residue)
855 raise RuntimeError(
"Ligands should be given as Entity or "
858 if new_editor
is not None:
859 new_editor.UpdateICS()
860 return extracted_ligands
862 def _compute_scores(self):
864 Compute score for every possible target-model ligand pair and store the
865 result in internal matrices.
871 self.
_score_matrix_score_matrix = np.full(shape, np.nan, dtype=np.float32)
872 self.
_coverage_matrix_coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
873 self.
_state_matrix_state_matrix = np.full(shape, 0, dtype=np.int32)
876 self.
_aux_matrix_aux_matrix = np.empty(shape, dtype=dict)
880 [_ResidueToGraph(l, by_atom_index=
True)
for l
in self.
target_ligandstarget_ligands]
882 [_ResidueToGraph(l, by_atom_index=
True)
for l
in self.
model_ligandsmodel_ligands]
884 for g_idx, g
in enumerate(target_graphs):
885 if not networkx.is_connected(g):
888 msg =
"Disconnected graph observed for target ligand "
892 for g_idx, g
in enumerate(model_graphs):
893 if not networkx.is_connected(g):
896 msg =
"Disconnected graph observed for model ligand "
901 for target_id, target_ligand
in enumerate(self.
target_ligandstarget_ligands):
902 LogVerbose(
"Analyzing target ligand %s" % target_ligand)
909 for model_id, model_ligand
in enumerate(self.
model_ligandsmodel_ligands):
910 LogVerbose(
"Compare to model ligand %s" % model_ligand)
923 model_ligand, target_ligand,
927 model_graph = model_graphs[model_id],
928 target_graph = target_graphs[target_id])
929 LogVerbose(
"Ligands %s and %s symmetry match" % (
930 str(model_ligand), str(target_ligand)))
931 except NoSymmetryError:
933 LogVerbose(
"No symmetry between %s and %s" % (
934 str(model_ligand), str(target_ligand)))
937 except TooManySymmetriesError:
939 LogVerbose(
"Too many symmetries between %s and %s" % (
940 str(model_ligand), str(target_ligand)))
943 except NoIsomorphicSymmetryError:
945 LogVerbose(
"No isomorphic symmetry between %s and %s" % (
946 str(model_ligand), str(target_ligand)))
949 except DisconnectedGraphError:
952 LogVerbose(
"Disconnected graph observed for %s and %s" % (
953 str(model_ligand), str(target_ligand)))
963 score, pair_state, target_ligand_state, model_ligand_state, aux\
964 = self.
_compute_compute(symmetries, target_ligand, model_ligand)
977 raise RuntimeError(f
"Subclass returned state "
978 f
"\"{state}\" for which no "
979 f
"description is available. Point "
980 f
"the developer of the used scorer "
981 f
"to this error message.")
985 if target_ligand_state != 0
and pair_state != 6:
986 raise RuntimeError(
"Observed non-zero target-ligand "
987 "state which must trigger certain "
988 "pair state. Point the developer of "
989 "the used scorer to this error message")
991 if model_ligand_state != 0
and pair_state != 6:
992 raise RuntimeError(
"Observed non-zero model-ligand "
993 "state which must trigger certain "
994 "pair state. Point the developer of "
995 "the used scorer to this error message")
997 self.
_state_matrix_state_matrix[target_id, model_id] = pair_state
1001 if score
is None or np.isnan(score):
1002 raise RuntimeError(
"LigandScorer returned invalid "
1003 "score despite 0 error state")
1005 self.
_score_matrix_score_matrix[target_id, model_id] = score
1006 cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1008 self.
_aux_matrix_aux_matrix[target_id, model_id] = aux
1010 def _compute(self, symmetries, target_ligand, model_ligand):
1011 """ Compute score for specified ligand pair - defined by child class
1013 Raises :class:`NotImplementedError` if not implemented by child class.
1015 :param symmetries: Defines symmetries between *target_ligand* and
1016 *model_ligand*. Return value of
1017 :func:`ComputeSymmetries`
1018 :type symmetries: :class:`list` of :class:`tuple` with two elements
1019 each: 1) :class:`list` of atom indices in
1020 *target_ligand* 2) :class:`list` of respective atom
1021 indices in *model_ligand*
1022 :param target_ligand: The target ligand
1023 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1024 :class:`ost.mol.ResidueView`
1025 :param model_ligand: The model ligand
1026 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1027 :class:`ost.mol.ResidueView`
1029 :returns: A :class:`tuple` with three elements: 1) a score
1030 (:class:`float`) 2) state (:class:`int`).
1031 3) auxiliary data for this ligand pair (:class:`dict`).
1032 If state is 0, the score and auxiliary data will be
1033 added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1034 as the respective value in :attr:`~coverage_matrix`.
1035 Returned score must be valid in this case (not None/NaN).
1036 Child specific non-zero states must be >= 10.
1038 raise NotImplementedError(
"_compute must be implemented by child "
1041 def _score_dir(self):
1042 """ Return direction of score - defined by child class
1044 Relevant for ligand assignment. Must return a string in ['+', '-'].
1045 '+' for ascending scores, i.e. higher is better (lddt etc.)
1046 '-' for descending scores, i.e. lower is better (rmsd etc.)
1048 raise NotImplementedError(
"_score_dir must be implemented by child "
1052 def _ResidueToGraph(residue, by_atom_index=False):
1053 """Return a NetworkX graph representation of the residue.
1055 :param residue: the residue from which to derive the graph
1056 :type residue: :class:`ost.mol.ResidueHandle` or
1057 :class:`ost.mol.ResidueView`
1058 :param by_atom_index: Set this parameter to True if you need the nodes to
1059 be labeled by atom index (within the residue).
1060 Otherwise, if False, the nodes will be labeled by
1062 :type by_atom_index: :class:`bool`
1063 :rtype: :class:`~networkx.classes.graph.Graph`
1065 Nodes are labeled with the Atom's uppercase
1066 :attr:`~ost.mol.AtomHandle.element`.
1068 nxg = networkx.Graph()
1070 for atom
in residue.atoms:
1071 nxg.add_node(atom.name, element=atom.element.upper())
1076 nxg.add_edges_from([(
1078 b.second.name)
for a
in residue.atoms
for b
in a.GetBondList()])
1081 nxg = networkx.relabel_nodes(nxg,
1082 {a: b
for a, b
in zip(
1083 [a.name
for a
in residue.atoms],
1084 range(len(residue.atoms)))},
1089 by_atom_index=False, return_symmetries=True,
1090 max_symmetries=1e6, model_graph = None,
1091 target_graph = None):
1092 """Return a list of symmetries (isomorphisms) of the model onto the target
1095 :param model_ligand: The model ligand
1096 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1097 :class:`ost.mol.ResidueView`
1098 :param target_ligand: The target ligand
1099 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1100 :class:`ost.mol.ResidueView`
1101 :param substructure_match: Set this to True to allow partial ligands
1103 :type substructure_match: :class:`bool`
1104 :param by_atom_index: Set this parameter to True if you need the symmetries
1105 to refer to atom index (within the residue).
1106 Otherwise, if False, the symmetries refer to atom
1108 :type by_atom_index: :class:`bool`
1109 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1110 return True if a mapping is found (and raise if
1111 no mapping is found). This is useful to quickly
1112 find out if a mapping exist without the expensive
1113 step to find all the mappings.
1114 :type return_symmetries: :class:`bool`
1115 :param max_symmetries: If more than that many isomorphisms exist, raise
1116 a :class:`TooManySymmetriesError`. This can only be assessed by
1117 generating at least that many isomorphisms and can take some time.
1118 :type max_symmetries: :class:`int`
1119 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1120 :class:`NoIsomorphicSymmetryError` in case of isomorphic
1121 subgraph but *substructure_match* is False;
1122 :class:`TooManySymmetriesError` when more than `max_symmetries`
1123 isomorphisms are found; :class:`DisconnectedGraphError` if
1124 graph for *model_ligand*/*target_ligand* is disconnected.
1128 if model_graph
is None:
1129 model_graph = _ResidueToGraph(model_ligand,
1130 by_atom_index=by_atom_index)
1132 if not networkx.is_connected(model_graph):
1133 msg =
"Disconnected graph for model ligand %s" % model_ligand
1137 if target_graph
is None:
1138 target_graph = _ResidueToGraph(target_ligand,
1139 by_atom_index=by_atom_index)
1141 if not networkx.is_connected(target_graph):
1142 msg =
"Disconnected graph for target ligand %s" % target_ligand
1149 gm = networkx.algorithms.isomorphism.GraphMatcher(
1150 model_graph, target_graph, node_match=
lambda x, y:
1151 x[
"element"] == y[
"element"])
1152 if gm.is_isomorphic():
1153 if not return_symmetries:
1156 for i, isomorphism
in enumerate(gm.isomorphisms_iter()):
1157 if i >= max_symmetries:
1159 "Too many symmetries between %s and %s" % (
1160 str(model_ligand), str(target_ligand)))
1161 symmetries.append((list(isomorphism.values()),
1162 list(isomorphism.keys())))
1163 assert len(symmetries) > 0
1164 LogDebug(
"Found %s isomorphic mappings (symmetries)" % len(symmetries))
1165 elif gm.subgraph_is_isomorphic()
and substructure_match:
1166 if not return_symmetries:
1169 for i, isomorphism
in enumerate(gm.subgraph_isomorphisms_iter()):
1170 if i >= max_symmetries:
1172 "Too many symmetries between %s and %s" % (
1173 str(model_ligand), str(target_ligand)))
1174 symmetries.append((list(isomorphism.values()),
1175 list(isomorphism.keys())))
1176 assert len(symmetries) > 0
1178 assert len(symmetries[0][0]) == len(target_ligand.atoms)
1179 n_sym = len(symmetries)
1180 LogDebug(
"Found %s subgraph isomorphisms (symmetries)" % n_sym)
1181 elif gm.subgraph_is_isomorphic():
1182 LogDebug(
"Found subgraph isomorphisms (symmetries), but"
1183 " ignoring because substructure_match=False")
1185 str(model_ligand), str(target_ligand)))
1187 LogDebug(
"Found no isomorphic mappings (symmetries)")
1189 str(model_ligand), str(target_ligand)))
1194 """ Exception raised when no symmetry can be found.
1198 class NoIsomorphicSymmetryError(ValueError):
1199 """ Exception raised when no isomorphic symmetry can be found
1201 There would be isomorphic subgraphs for which symmetries can
1202 be found, but substructure_match is disabled
1207 """ Exception raised when too many symmetries are found.
1212 """ Exception raised when the ligand graph is disconnected.
1217 __all__ = (
'LigandScorer',
'ComputeSymmetries',
'NoSymmetryError',
1218 'NoIsomorphicSymmetryError',
'TooManySymmetriesError',
1219 'DisconnectedGraphError')
def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, rename_ligand_chain=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5)
def _compute_scores(self)
def coverage_matrix(self)
def guess_target_ligand_unassigned_reason(self, trg_lig_idx)
def _compute(self, symmetries, target_ligand, model_ligand)
def unassigned_target_ligands(self)
def unassigned_model_ligands_reasons(self)
def _prepare_ligands(new_entity, old_entity, ligands, rename_chain)
def model_ligand_states(self)
def unassigned_model_ligands(self)
def _extract_ligands(entity)
def target_ligand_states(self)
def get_target_ligand_state_report(self, trg_lig_idx)
def get_model_ligand_state_report(self, mdl_lig_idx)
def _get_report(self, ligand_state, pair_states)
def guess_model_ligand_unassigned_reason(self, mdl_lig_idx)
def unassigned_target_ligands_reasons(self)
def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)