1 from contextlib
import contextmanager
6 from ost
import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
11 def _SinkVerbosityLevel(n=1):
12 """ Context manager to temporarily reduce the verbosity level by n.
15 with _SinkVerbosityLevel(2):
17 Will only write "Test" in script level (2 above)
29 """ Scorer to compute various small molecule ligand (non polymer) scores.
34 - Python modules `numpy` and `networkx` must be available
35 (e.g. use ``pip install numpy networkx``)
37 :class:`LigandScorer` is an abstract base class dealing with all the setup,
38 data storage, enumerating ligand symmetries and target/model ligand
39 matching/assignment. But actual score computation is delegated to child
42 At the moment, two such classes are available:
44 * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
45 that assesses the conservation of protein-ligand
47 * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
48 that computes a binding-site superposed, symmetry-corrected RMSD
49 (BiSyRMSD) and ligand pocket lDDT (lDDT-LP).
51 All versus all scores are available through the lazily computed
52 :attr:`score_matrix`. However, many things can go wrong... be it even
53 something as simple as two ligands not matching. Error states therefore
54 encode scoring issues. An Issue for a particular ligand is indicated by a
55 non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
56 This invalidates pairwise scores of such a ligand with all other ligands.
57 This and other issues in pairwise score computation are reported in
58 :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
59 Only if the respective location is 0, a valid pairwise score can be
60 expected. The states and their meaning can be explored with code::
62 for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
67 A common use case is to derive a one-to-one mapping between ligands in
68 the model and the target for which :class:`LigandScorer` provides an
69 automated :attr:`assignment` procedure.
70 By default, only exact matches between target and model ligands are
71 considered. This is a problem when the target only contains a subset
72 of the expected atoms (for instance if atoms are missing in an
73 experimental structure, which often happens in the PDB). With
74 `substructure_match=True`, complete model ligands can be scored against
75 partial target ligands. One problem with this approach is that it is
76 very easy to find good matches to small, irrelevant ligands like EDO, CO2
77 or GOL. The assignment algorithm therefore considers the coverage,
78 expressed as the fraction of atoms of the model ligand atoms covered in the
79 target. Higher coverage matches are prioritized, but a match with a better
80 score will be preferred if it falls within a window of `coverage_delta`
81 (by default 0.2) of a worse-scoring match. As a result, for instance,
82 with a delta of 0.2, a low-score match with coverage 0.96 would be
83 preferred over a high-score match with coverage 0.70.
87 :class:`LigandScorer` generally assumes that the
88 :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all
89 the ligand residues, and only ligand atoms. This is typically the case for
90 entities loaded from mmCIF (tested with mmCIF files from the PDB and
91 SWISS-MODEL). Legacy PDB files must contain `HET` headers (which is usually
92 the case for files downloaded from the PDB but not elsewhere).
94 The class doesn't perform any cleanup of the provided structures.
95 It is up to the caller to ensure that the data is clean and suitable for
96 scoring. :ref:`Molck <molck>` should be used with extra
97 care, as many of the options (such as `rm_non_std` or `map_nonstd_res`) can
98 cause ligands to be removed from the structure. If cleanup with Molck is
99 needed, ligands should be kept aside and passed separately. Non-ligand
100 residues should be valid compounds with atom names following the naming
101 conventions of the component dictionary. Non-standard residues are
102 acceptable, and if the model contains a standard residue at that position,
103 only atoms with matching names will be considered.
105 Unlike most of OpenStructure, this class does not assume that the ligands
106 (either in the model or the target) are part of the PDB component
107 dictionary. They may have arbitrary residue names. Residue names do not
108 have to match between the model and the target. Matching is based on
109 the calculation of isomorphisms which depend on the atom element name and
110 atom connectivity (bond order is ignored).
111 It is up to the caller to ensure that the connectivity of atoms is properly
112 set before passing any ligands to this class. Ligands with improper
113 connectivity will lead to bogus results.
115 Note, however, that atom names should be unique within a residue (ie two
116 distinct atoms cannot have the same atom name).
118 This only applies to the ligand. The rest of the model and target
119 structures (protein, nucleic acids) must still follow the usual rules and
120 contain only residues from the compound library.
122 Although it isn't a requirement, hydrogen atoms should be removed from the
123 structures. Here is an example code snippet that will perform a reasonable
124 cleanup. Keep in mind that this is most likely not going to work as
125 expected with entities loaded from PDB files, as the `is_ligand` flag is
126 probably not set properly.
128 Here is an example of how to use setup a scorer code::
130 from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
131 from ost.mol.alg import Molck, MolckSettings
134 # Structure model in PDB format, containing the receptor only
135 model = io.LoadPDB("path_to_model.pdb")
136 # Ligand model as SDF file
137 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
138 # Target loaded from mmCIF, containing the ligand
139 target = io.LoadMMCIF("path_to_target.cif")
141 # Cleanup a copy of the structures
142 cleaned_model = model.Copy()
143 cleaned_target = target.Copy()
144 molck_settings = MolckSettings(rm_unk_atoms=True,
148 rm_zero_occ_atoms=False,
150 map_nonstd_res=False,
152 Molck(cleaned_model, conop.GetDefaultLib(), molck_settings)
153 Molck(cleaned_target, conop.GetDefaultLib(), molck_settings)
155 # Setup scorer object and compute lDDT-PLI
156 model_ligands = [model_ligand.Select("ele != H")]
157 sc = SCRMSDScorer(cleaned_model, cleaned_target, model_ligands)
159 # Perform assignment and read respective scores
160 for lig_pair in sc.assignment:
161 trg_lig = sc.target_ligands[lig_pair[0]]
162 mdl_lig = sc.model_ligands[lig_pair[1]]
163 score = sc.score_matrix[lig_pair[0], lig_pair[1]]
164 print(f"Score for {trg_lig} and {mdl_lig}: {score}")
166 :param model: Model structure - a deep copy is available as :attr:`model`.
167 No additional processing (ie. Molck), checks,
168 stereochemistry checks or sanitization is performed on the
169 input. Hydrogen atoms are kept.
170 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
171 :param target: Target structure - a deep copy is available as
172 :attr:`target`. No additional processing (ie. Molck), checks
173 or sanitization is performed on the input. Hydrogen atoms are
175 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
176 :param model_ligands: Model ligands, as a list of
177 :class:`~ost.mol.ResidueHandle` belonging to the model
178 entity. Can be instantiated with either a :class:list
179 of :class:`~ost.mol.ResidueHandle`/
180 :class:`ost.mol.ResidueView` or of
181 :class:`ost.mol.EntityHandle`/
182 :class:`ost.mol.EntityView`.
183 If `None`, ligands will be extracted based on the
184 :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
185 normally set properly in entities loaded from mmCIF).
186 :type model_ligands: :class:`list`
187 :param target_ligands: Target ligands, as a list of
188 :class:`~ost.mol.ResidueHandle` belonging to the
189 target entity. Can be instantiated either a
190 :class:list of :class:`~ost.mol.ResidueHandle`/
191 :class:`ost.mol.ResidueView` or of
192 :class:`ost.mol.EntityHandle`/
193 :class:`ost.mol.EntityView` containing a single
194 residue each. If `None`, ligands will be extracted
195 based on the :attr:`~ost.mol.ResidueHandle.is_ligand`
196 flag (this is normally set properly in entities
198 :type target_ligands: :class:`list`
199 :param resnum_alignments: Whether alignments between chemically equivalent
200 chains in *model* and *target* can be computed
201 based on residue numbers. This can be assumed in
202 benchmarking setups such as CAMEO/CASP.
203 :type resnum_alignments: :class:`bool`
204 :param rename_ligand_chain: If a residue with the same chain name and
205 residue number than an explicitly passed model
206 or target ligand exits in the structure,
207 and `rename_ligand_chain` is False, a
208 RuntimeError will be raised. If
209 `rename_ligand_chain` is True, the ligand will
210 be moved to a new chain instead, and the move
211 will be logged to the console with SCRIPT
213 :type rename_ligand_chain: :class:`bool`
214 :param substructure_match: Set this to True to allow incomplete (i.e.
215 partially resolved) target ligands.
216 :type substructure_match: :class:`bool`
217 :param coverage_delta: the coverage delta for partial ligand assignment.
218 :type coverage_delta: :class:`float`
219 :param max_symmetries: If more than that many isomorphisms exist for
220 a target-ligand pair, it will be ignored and reported
222 :type max_symmetries: :class:`int`
225 def __init__(self, model, target, model_ligands=None, target_ligands=None,
226 resnum_alignments=False, rename_ligand_chain=False,
227 substructure_match=False, coverage_delta=0.2,
231 self.
modelmodel = mol.CreateEntityFromView(model,
False)
233 self.
modelmodel = model.Copy()
235 raise RuntimeError(
"model must be of type EntityView/EntityHandle")
238 self.
targettarget = mol.CreateEntityFromView(target,
False)
240 self.
targettarget = target.Copy()
242 raise RuntimeError(
"target must be of type EntityView/EntityHandle")
245 if target_ligands
is None:
252 LogWarning(
"No ligands in the target")
255 if model_ligands
is None:
262 LogWarning(
"No ligands in the model")
264 raise ValueError(
"No ligand in the model and in the target")
299 iso =
"subgraph isomorphism"
301 iso =
"full graph isomorphism"
305 1: (
"identity", f
"Ligands could not be matched (by {iso})"),
306 2: (
"symmetries",
"Too many symmetries between ligand atoms were "
307 "found - increasing max_symmetries might help"),
308 3: (
"no_iso",
"No full isomorphic match could be found - enabling "
309 "substructure_match might allow a match"),
310 4: (
"disconnected",
"Ligand graph is disconnected"),
311 5: (
"stoichiometry",
"Ligand was already assigned to another ligand "
312 "(different stoichiometry)"),
313 6: (
"single_ligand_issue",
"Cannot compute valid pairwise score as "
314 "either model or target ligand have non-zero state."),
315 9: (
"unknown",
"An unknown error occured in LigandScorer")}
319 """ Encodes states of ligand pairs
321 Ligand pairs can be matched and a valid score can be expected if
322 respective location in this matrix is 0.
323 Target ligands are in rows, model ligands in columns. States are encoded
324 as integers <= 9. Larger numbers encode errors for child classes.
325 Use something like ``self.state_decoding[3]`` to get a decscription.
327 :rtype: :class:`~numpy.ndarray`
335 """ Encodes states of model ligands
337 Non-zero state in any of the model ligands invalidates the full
338 respective column in :attr:`~state_matrix`.
340 :rtype: :class:`~numpy.ndarray`
348 """ Encodes states of target ligands
350 Non-zero state in any of the target ligands invalidates the full
351 respective row in :attr:`~state_matrix`.
353 :rtype: :class:`~numpy.ndarray`
361 """ Get the matrix of scores.
363 Target ligands are in rows, model ligands in columns.
365 NaN values indicate that no value could be computed (i.e. different
366 ligands). In other words: values are only valid if the respective
367 location in :attr:`~state_matrix` is 0.
369 :rtype: :class:`~numpy.ndarray`
377 """ Get the matrix of model ligand atom coverage in the target.
379 Target ligands are in rows, model ligands in columns.
381 NaN values indicate that no value could be computed (i.e. different
382 ligands). In other words: values are only valid if the respective
383 location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
384 only full match isomorphisms are considered, and therefore only values
385 of 1.0 can be observed.
387 :rtype: :class:`~numpy.ndarray`
395 """ Get the matrix of scorer specific auxiliary data.
397 Target ligands are in rows, model ligands in columns.
399 Auxiliary data consists of arbitrary data dicts which allow a child
400 class to provide additional information for a scored ligand pair.
401 empty dictionaries indicate that the child class simply didn't return
402 anything or that no value could be computed (e.g. different ligands).
403 In other words: values are only valid if respective location in the
404 :attr:`~state_matrix` is 0.
406 :rtype: :class:`~numpy.ndarray`
414 """ Ligand assignment based on computed scores
416 Implements a greedy algorithm to assign target and model ligands
417 with each other. Starts from each valid ligand pair as indicated
418 by a state of 0 in :attr:`state_matrix`. Each iteration first selects
419 high coverage pairs. Given max_coverage defined as the highest
420 coverage observed in the available pairs, all pairs with coverage
421 in [max_coverage-*coverage_delta*, max_coverage] are selected.
422 The best scoring pair among those is added to the assignment
423 and the whole process is repeated until there are no ligands to
426 :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
434 for trg_idx
in range(self.
score_matrixscore_matrix.shape[0]):
435 for mdl_idx
in range(self.
score_matrixscore_matrix.shape[1]):
436 if self.
state_matrixstate_matrix[trg_idx, mdl_idx] == 0:
437 tmp.append((self.
score_matrixscore_matrix[trg_idx, mdl_idx],
443 tmp.sort(reverse=
True)
447 raise RuntimeError(
"LigandScorer._score_dir must return one in "
450 LogScript(
"Computing ligand assignment")
453 coverage_thresh = max([x[1]
for x
in tmp]) - self.
coverage_deltacoverage_delta
454 top_coverage = [x
for x
in tmp
if x[1] >= coverage_thresh]
457 a = top_coverage[0][2]
458 b = top_coverage[0][3]
462 tmp = [x
for x
in tmp
if (x[2] != a
and x[3] != b)]
468 """ Get a dictionary of score values, keyed by model ligand
470 Extract score with something like:
471 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
472 The returned scores are based on :attr:`~assignment`.
474 :rtype: :class:`dict`
478 for (trg_lig_idx, mdl_lig_idx)
in self.
assignmentassignment:
480 cname = mdl_lig.GetChain().GetName()
481 rnum = mdl_lig.GetNumber()
484 score = self.
score_matrixscore_matrix[trg_lig_idx, mdl_lig_idx]
490 """ Get a dictionary of score details, keyed by model ligand
492 Extract dict with something like:
493 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
494 The returned info dicts are based on :attr:`~assignment`. The content is
495 documented in the respective child class.
497 :rtype: :class:`dict`
501 for (trg_lig_idx, mdl_lig_idx)
in self.
assignmentassignment:
503 cname = mdl_lig.GetChain().GetName()
504 rnum = mdl_lig.GetNumber()
507 d = self.
aux_matrixaux_matrix[trg_lig_idx, mdl_lig_idx]
513 """ Get indices of target ligands which are not assigned
515 :rtype: :class:`list` of :class:`int`
518 assigned = set([x[0]
for x
in self.
assignmentassignment])
519 return [x
for x
in range(len(self.
target_ligandstarget_ligands))
if x
not in assigned]
523 """ Get indices of model ligands which are not assigned
525 :rtype: :class:`list` of :class:`int`
528 assigned = set([x[1]
for x
in self.
assignmentassignment])
529 return [x
for x
in range(len(self.
model_ligandsmodel_ligands))
if x
not in assigned]
532 """ Get summary of states observed with respect to all model ligands
534 Mainly for debug purposes
536 :param trg_lig_idx: Index of target ligand for which report should be
538 :type trg_lig_idx: :class:`int`
544 """ Get summary of states observed with respect to all target ligands
546 Mainly for debug purposes
548 :param mdl_lig_idx: Index of model ligand for which report should be
550 :type mdl_lig_idx: :class:`int`
555 def _get_report(self, ligand_state, pair_states):
559 for s
in np.unique(pair_states):
561 indices = np.flatnonzero(pair_states == s).tolist()
562 pair_report.append({
"state": s,
563 "short desc": desc[0],
568 ligand_report = {
"state": ligand_state,
569 "short desc": desc[0],
572 return (ligand_report, pair_report)
575 """ Makes an educated guess why target ligand is not assigned
577 This either returns actual error states or custom states that are
578 derived from them. Currently, the following reasons are reported:
580 * `no_ligand`: there was no ligand in the model.
581 * `disconnected`: the ligand graph was disconnected.
582 * `identity`: the ligand was not found in the model (by graph
583 isomorphism). Check your ligand connectivity.
584 * `no_iso`: no full isomorphic match could be found. Try enabling
585 `substructure_match=True` if the target ligand is incomplete.
586 * `symmetries`: too many symmetries were found (by graph isomorphisms).
587 Try to increase `max_symmetries`.
588 * `stoichiometry`: there was a possible assignment in the model, but
589 the model ligand was already assigned to a different target ligand.
590 This indicates different stoichiometries.
591 * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
592 the binding site and the ligand, and lDDT-PLI is undefined.
593 * `target_binding_site` (SCRMSD only): no polymer residues were in
594 proximity of the target ligand.
595 * `model_binding_site` (SCRMSD only): the binding site was not found
596 in the model. Either the binding site was not modeled or the model
597 ligand was positioned too far in combination with
598 `full_bs_search=False`.
600 :param trg_lig_idx: Index of target ligand
601 :type trg_lig_idx: :class:`int`
602 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
603 sentence describing the issue, (\"unknown\",\"unknown\") if
604 nothing obvious can be found.
605 :raises: :class:`RuntimeError` if specified target ligand is assigned
608 raise RuntimeError(
"Specified target ligand is not unassigned")
612 return (
"no_ligand",
"No ligand in the model")
620 tmp = np.unique(self.
state_matrixstate_matrix[trg_lig_idx,:])
626 return (
"stoichiometry",
627 "Ligand was already assigned to an other "
628 "model ligand (different stoichiometry)")
637 mdl_idx = np.where(self.
state_matrixstate_matrix[trg_lig_idx,:]==6)[0]
640 raise RuntimeError(
"This should never happen")
647 if 6
in tmp
and len(tmp) > 1:
651 if 1
in tmp
and len(tmp) > 1:
659 """ Makes an educated guess why model ligand is not assigned
661 This either returns actual error states or custom states that are
662 derived from them. Currently, the following reasons are reported:
664 * `no_ligand`: there was no ligand in the target.
665 * `disconnected`: the ligand graph is disconnected.
666 * `identity`: the ligand was not found in the target (by graph or
667 subgraph isomorphism). Check your ligand connectivity.
668 * `no_iso`: no full isomorphic match could be found. Try enabling
669 `substructure_match=True` if the target ligand is incomplete.
670 * `symmetries`: too many symmetries were found (by graph isomorphisms).
671 Try to increase `max_symmetries`.
672 * `stoichiometry`: there was a possible assignment in the target, but
673 the model target was already assigned to a different model ligand.
674 This indicates different stoichiometries.
675 * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
676 the binding site and the ligand, and lDDT-PLI is undefined.
677 * `target_binding_site` (SCRMSD only): a potential assignment was found
678 in the target, but there were no polymer residues in proximity of the
679 ligand in the target.
680 * `model_binding_site` (SCRMSD only): a potential assignment was
681 found in the target, but no binding site was found in the model.
682 Either the binding site was not modeled or the model ligand was
683 positioned too far in combination with `full_bs_search=False`.
685 :param mdl_lig_idx: Index of model ligand
686 :type mdl_lig_idx: :class:`int`
687 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
688 sentence describing the issue, (\"unknown\",\"unknown\") if
689 nothing obvious can be found.
690 :raises: :class:`RuntimeError` if specified model ligand is assigned
693 raise RuntimeError(
"Specified model ligand is not unassigned")
697 return (
"no_ligand",
"No ligand in the target")
705 tmp = np.unique(self.
state_matrixstate_matrix[:,mdl_lig_idx])
711 return (
"stoichiometry",
712 "Ligand was already assigned to an other "
713 "model ligand (different stoichiometry)")
722 trg_idx = np.where(self.
state_matrixstate_matrix[:,mdl_lig_idx]==6)[0]
725 raise RuntimeError(
"This should never happen")
732 if 6
in tmp
and len(tmp) > 1:
736 if 1
in tmp
and len(tmp) > 1:
747 cname = lig.GetChain().GetName()
748 rnum = lig.GetNumber()
749 if cname
not in return_dict:
750 return_dict[cname] = dict()
751 return_dict[cname][rnum] = \
760 cname = lig.GetChain().GetName()
761 rnum = lig.GetNumber()
762 if cname
not in return_dict:
763 return_dict[cname] = dict()
764 return_dict[cname][rnum] = \
769 def _chain_mapper(self):
770 """ Chain mapper object for the given :attr:`target`.
772 Can be used by child classes if needed, constructed with
773 *resnum_alignments* flag
775 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
778 with _SinkVerbosityLevel():
786 def _extract_ligands(entity):
787 """Extract ligands from entity. Return a list of residues.
789 Assumes that ligands have the :attr:`~ost.mol.ResidueHandle.is_ligand`
790 flag set. This is typically the case for entities loaded from mmCIF
791 (tested with mmCIF files from the PDB and SWISS-MODEL).
792 Legacy PDB files must contain `HET` headers (which is usually the
793 case for files downloaded from the PDB but not elsewhere).
795 This function performs basic checks to ensure that the residues in this
796 chain are not forming polymer bonds (ie peptide/nucleotide ligands) and
797 will raise a RuntimeError if this assumption is broken.
799 :param entity: the entity to extract ligands from
800 :type entity: :class:`~ost.mol.EntityHandle`
801 :rtype: :class:`list` of :class:`~ost.mol.ResidueHandle`
804 extracted_ligands = []
805 for residue
in entity.residues:
806 if residue.is_ligand:
807 if mol.InSequence(residue, residue.next):
808 raise RuntimeError(
"Residue %s connected in polymer sequen"
809 "ce %s" % (residue.qualified_name))
810 extracted_ligands.append(residue)
811 LogVerbose(
"Detected residue %s as ligand" % residue)
812 return extracted_ligands
815 def _prepare_ligands(new_entity, old_entity, ligands, rename_chain):
816 """Prepare the ligands given into a list of ResidueHandles which are
817 part of the copied entity, suitable for the model_ligands and
818 target_ligands properties.
820 This function takes a list of ligands as (Entity|Residue)(Handle|View).
821 Entities can contain multiple ligands, which will be considered as
824 Ligands which are part of the entity are simply fetched in the new
825 copied entity. Otherwise, they are copied over to the copied entity.
827 extracted_ligands = []
832 def _copy_residue(residue, rename_chain):
833 """ Copy the residue into the new chain.
834 Return the new residue handle."""
835 nonlocal next_chain_num, new_editor
838 if new_editor
is None:
839 new_editor = new_entity.EditXCS(mol.BUFFERED_EDIT)
841 new_chain = new_entity.FindChain(residue.chain.name)
842 if not new_chain.IsValid():
843 new_chain = new_editor.InsertChain(residue.chain.name)
844 new_editor.SetChainType(new_chain,
845 mol.ChainType.CHAINTYPE_NON_POLY)
848 already_exists = new_chain.FindResidue(residue.number).IsValid()
854 residue.chain.name +
"_" + str(chain_ext)
855 new_chain = new_entity.FindChain(new_chain_name)
856 if new_chain.IsValid():
861 new_editor.InsertChain(new_chain_name)
863 LogInfo(
"Moved ligand residue %s to new chain %s" % (
864 residue.qualified_name, new_chain.name))
867 "A residue number %s already exists in chain %s" % (
868 residue.number, residue.chain.name)
869 raise RuntimeError(msg)
872 new_res = new_editor.AppendResidue(new_chain, residue.name,
875 for old_atom
in residue.atoms:
876 new_editor.InsertAtom(new_res, old_atom.name, old_atom.pos,
877 element=old_atom.element, occupancy=old_atom.occupancy,
878 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
880 for old_atom
in residue.atoms:
881 for old_bond
in old_atom.bonds:
882 new_first = new_res.FindAtom(old_bond.first.name)
883 new_second = new_res.FindAtom(old_bond.second.name)
884 new_editor.Connect(new_first, new_second)
887 def _process_ligand_residue(res, rename_chain):
888 """Copy or fetch the residue. Return the residue handle."""
890 if res.entity.handle == old_entity.handle:
894 new_res = new_entity.FindResidue(res.chain.name, res.number)
895 if new_res
and new_res.valid:
896 qname = res.handle.qualified_name
897 LogVerbose(
"Ligand residue %s already in entity" % qname)
900 new_res = _copy_residue(res, rename_chain)
901 qname = res.handle.qualified_name
902 LogVerbose(
"Copied ligand residue %s" % qname)
903 new_res.SetIsLigand(
True)
906 for ligand
in ligands:
912 for residue
in ligand.residues:
913 new_residue = _process_ligand_residue(residue, rename_chain)
914 extracted_ligands.append(new_residue)
916 new_residue = _process_ligand_residue(ligand, rename_chain)
917 extracted_ligands.append(new_residue)
919 raise RuntimeError(
"Ligands should be given as Entity or "
922 if new_editor
is not None:
923 new_editor.UpdateICS()
924 return extracted_ligands
926 def _compute_scores(self):
928 Compute score for every possible target-model ligand pair and store the
929 result in internal matrices.
935 self.
_score_matrix_score_matrix = np.full(shape, np.nan, dtype=np.float32)
936 self.
_coverage_matrix_coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
937 self.
_state_matrix_state_matrix = np.full(shape, 0, dtype=np.int32)
940 self.
_aux_matrix_aux_matrix = np.empty(shape, dtype=dict)
944 [_ResidueToGraph(l, by_atom_index=
True)
for l
in self.
target_ligandstarget_ligands]
946 [_ResidueToGraph(l, by_atom_index=
True)
for l
in self.
model_ligandsmodel_ligands]
948 for g_idx, g
in enumerate(target_graphs):
949 if not networkx.is_connected(g):
952 msg =
"Disconnected graph observed for target ligand "
956 for g_idx, g
in enumerate(model_graphs):
957 if not networkx.is_connected(g):
960 msg =
"Disconnected graph observed for model ligand "
965 LogScript(
"Computing pairwise scores for all %s x %s ligands" % shape)
966 for target_id, target_ligand
in enumerate(self.
target_ligandstarget_ligands):
967 LogInfo(
"Analyzing target ligand %s" % target_ligand)
974 for model_id, model_ligand
in enumerate(self.
model_ligandsmodel_ligands):
975 LogInfo(
"Comparing to model ligand %s" % model_ligand)
988 model_ligand, target_ligand,
992 model_graph = model_graphs[model_id],
993 target_graph = target_graphs[target_id])
994 LogInfo(
"Ligands %s and %s symmetry match" % (
995 str(model_ligand), str(target_ligand)))
996 except NoSymmetryError:
998 LogInfo(
"No symmetry between %s and %s" % (
999 str(model_ligand), str(target_ligand)))
1002 except TooManySymmetriesError:
1004 LogWarning(
"Too many symmetries between %s and %s" % (
1005 str(model_ligand), str(target_ligand)))
1008 except NoIsomorphicSymmetryError:
1010 LogInfo(
"No isomorphic symmetry between %s and %s" % (
1011 str(model_ligand), str(target_ligand)))
1014 except DisconnectedGraphError:
1017 LogError(
"Disconnected graph observed for %s and %s" % (
1018 str(model_ligand), str(target_ligand)))
1028 score, pair_state, target_ligand_state, model_ligand_state, aux\
1029 = self.
_compute_compute(symmetries, target_ligand, model_ligand)
1040 target_ligand_state
not in self.
state_decodingstate_decoding
or \
1042 raise RuntimeError(f
"Subclass returned state "
1043 f
"\"{state}\" for which no "
1044 f
"description is available. Point "
1045 f
"the developer of the used scorer "
1046 f
"to this error message.")
1050 if target_ligand_state != 0
and pair_state != 6:
1051 raise RuntimeError(
"Observed non-zero target-ligand "
1052 "state which must trigger certain "
1053 "pair state. Point the developer of "
1054 "the used scorer to this error message")
1056 if model_ligand_state != 0
and pair_state != 6:
1057 raise RuntimeError(
"Observed non-zero model-ligand "
1058 "state which must trigger certain "
1059 "pair state. Point the developer of "
1060 "the used scorer to this error message")
1062 self.
_state_matrix_state_matrix[target_id, model_id] = pair_state
1066 if score
is None or np.isnan(score):
1067 raise RuntimeError(
"LigandScorer returned invalid "
1068 "score despite 0 error state")
1070 self.
_score_matrix_score_matrix[target_id, model_id] = score
1071 cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1073 self.
_aux_matrix_aux_matrix[target_id, model_id] = aux
1075 def _compute(self, symmetries, target_ligand, model_ligand):
1076 """ Compute score for specified ligand pair - defined by child class
1078 Raises :class:`NotImplementedError` if not implemented by child class.
1080 :param symmetries: Defines symmetries between *target_ligand* and
1081 *model_ligand*. Return value of
1082 :func:`ComputeSymmetries`
1083 :type symmetries: :class:`list` of :class:`tuple` with two elements
1084 each: 1) :class:`list` of atom indices in
1085 *target_ligand* 2) :class:`list` of respective atom
1086 indices in *model_ligand*
1087 :param target_ligand: The target ligand
1088 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1089 :class:`ost.mol.ResidueView`
1090 :param model_ligand: The model ligand
1091 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1092 :class:`ost.mol.ResidueView`
1094 :returns: A :class:`tuple` with three elements: 1) a score
1095 (:class:`float`) 2) state (:class:`int`).
1096 3) auxiliary data for this ligand pair (:class:`dict`).
1097 If state is 0, the score and auxiliary data will be
1098 added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1099 as the respective value in :attr:`~coverage_matrix`.
1100 Returned score must be valid in this case (not None/NaN).
1101 Child specific non-zero states must be >= 10.
1103 raise NotImplementedError(
"_compute must be implemented by child "
1106 def _score_dir(self):
1107 """ Return direction of score - defined by child class
1109 Relevant for ligand assignment. Must return a string in ['+', '-'].
1110 '+' for ascending scores, i.e. higher is better (lddt etc.)
1111 '-' for descending scores, i.e. lower is better (rmsd etc.)
1113 raise NotImplementedError(
"_score_dir must be implemented by child "
1117 def _ResidueToGraph(residue, by_atom_index=False):
1118 """Return a NetworkX graph representation of the residue.
1120 :param residue: the residue from which to derive the graph
1121 :type residue: :class:`ost.mol.ResidueHandle` or
1122 :class:`ost.mol.ResidueView`
1123 :param by_atom_index: Set this parameter to True if you need the nodes to
1124 be labeled by atom index (within the residue).
1125 Otherwise, if False, the nodes will be labeled by
1127 :type by_atom_index: :class:`bool`
1128 :rtype: :class:`~networkx.classes.graph.Graph`
1130 Nodes are labeled with the Atom's uppercase
1131 :attr:`~ost.mol.AtomHandle.element`.
1133 nxg = networkx.Graph()
1135 for atom
in residue.atoms:
1136 nxg.add_node(atom.name, element=atom.element.upper())
1141 nxg.add_edges_from([(
1143 b.second.name)
for a
in residue.atoms
for b
in a.GetBondList()])
1146 nxg = networkx.relabel_nodes(nxg,
1147 {a: b
for a, b
in zip(
1148 [a.name
for a
in residue.atoms],
1149 range(len(residue.atoms)))},
1154 by_atom_index=False, return_symmetries=True,
1155 max_symmetries=1e6, model_graph = None,
1156 target_graph = None):
1157 """Return a list of symmetries (isomorphisms) of the model onto the target
1160 :param model_ligand: The model ligand
1161 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1162 :class:`ost.mol.ResidueView`
1163 :param target_ligand: The target ligand
1164 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1165 :class:`ost.mol.ResidueView`
1166 :param substructure_match: Set this to True to allow partial ligands
1168 :type substructure_match: :class:`bool`
1169 :param by_atom_index: Set this parameter to True if you need the symmetries
1170 to refer to atom index (within the residue).
1171 Otherwise, if False, the symmetries refer to atom
1173 :type by_atom_index: :class:`bool`
1174 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1175 return True if a mapping is found (and raise if
1176 no mapping is found). This is useful to quickly
1177 find out if a mapping exist without the expensive
1178 step to find all the mappings.
1179 :type return_symmetries: :class:`bool`
1180 :param max_symmetries: If more than that many isomorphisms exist, raise
1181 a :class:`TooManySymmetriesError`. This can only be assessed by
1182 generating at least that many isomorphisms and can take some time.
1183 :type max_symmetries: :class:`int`
1184 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1185 :class:`NoIsomorphicSymmetryError` in case of isomorphic
1186 subgraph but *substructure_match* is False;
1187 :class:`TooManySymmetriesError` when more than `max_symmetries`
1188 isomorphisms are found; :class:`DisconnectedGraphError` if
1189 graph for *model_ligand*/*target_ligand* is disconnected.
1193 if model_graph
is None:
1194 model_graph = _ResidueToGraph(model_ligand,
1195 by_atom_index=by_atom_index)
1197 if not networkx.is_connected(model_graph):
1198 msg =
"Disconnected graph for model ligand %s" % model_ligand
1202 if target_graph
is None:
1203 target_graph = _ResidueToGraph(target_ligand,
1204 by_atom_index=by_atom_index)
1206 if not networkx.is_connected(target_graph):
1207 msg =
"Disconnected graph for target ligand %s" % target_ligand
1214 gm = networkx.algorithms.isomorphism.GraphMatcher(
1215 model_graph, target_graph, node_match=
lambda x, y:
1216 x[
"element"] == y[
"element"])
1217 if gm.is_isomorphic():
1218 if not return_symmetries:
1221 for i, isomorphism
in enumerate(gm.isomorphisms_iter()):
1222 if i >= max_symmetries:
1224 "Too many symmetries between %s and %s" % (
1225 str(model_ligand), str(target_ligand)))
1226 symmetries.append((list(isomorphism.values()),
1227 list(isomorphism.keys())))
1228 assert len(symmetries) > 0
1229 LogVerbose(
"Found %s isomorphic mappings (symmetries)" % len(symmetries))
1230 elif gm.subgraph_is_isomorphic()
and substructure_match:
1231 if not return_symmetries:
1234 for i, isomorphism
in enumerate(gm.subgraph_isomorphisms_iter()):
1235 if i >= max_symmetries:
1237 "Too many symmetries between %s and %s" % (
1238 str(model_ligand), str(target_ligand)))
1239 symmetries.append((list(isomorphism.values()),
1240 list(isomorphism.keys())))
1241 assert len(symmetries) > 0
1243 assert len(symmetries[0][0]) == len(target_ligand.atoms)
1244 n_sym = len(symmetries)
1245 LogVerbose(
"Found %s subgraph isomorphisms (symmetries)" % n_sym)
1246 elif gm.subgraph_is_isomorphic():
1247 LogVerbose(
"Found subgraph isomorphisms (symmetries), but"
1248 " ignoring because substructure_match=False")
1250 str(model_ligand), str(target_ligand)))
1252 LogVerbose(
"Found no isomorphic mappings (symmetries)")
1254 str(model_ligand), str(target_ligand)))
1259 """ Exception raised when no symmetry can be found.
1263 class NoIsomorphicSymmetryError(ValueError):
1264 """ Exception raised when no isomorphic symmetry can be found
1266 There would be isomorphic subgraphs for which symmetries can
1267 be found, but substructure_match is disabled
1272 """ Exception raised when too many symmetries are found.
1277 """ Exception raised when the ligand graph is disconnected.
1282 __all__ = (
'LigandScorer',
'ComputeSymmetries',
'NoSymmetryError',
1283 'NoIsomorphicSymmetryError',
'TooManySymmetriesError',
1284 'DisconnectedGraphError')
def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, rename_ligand_chain=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5)
def _compute_scores(self)
def coverage_matrix(self)
def guess_target_ligand_unassigned_reason(self, trg_lig_idx)
def _compute(self, symmetries, target_ligand, model_ligand)
def unassigned_target_ligands(self)
def unassigned_model_ligands_reasons(self)
def _prepare_ligands(new_entity, old_entity, ligands, rename_chain)
def model_ligand_states(self)
def unassigned_model_ligands(self)
def _extract_ligands(entity)
def target_ligand_states(self)
def get_target_ligand_state_report(self, trg_lig_idx)
def get_model_ligand_state_report(self, mdl_lig_idx)
def _get_report(self, ligand_state, pair_states)
def guess_model_ligand_unassigned_reason(self, mdl_lig_idx)
def unassigned_target_ligands_reasons(self)
def PushVerbosityLevel(value)
def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)