4 import numpy.ma
as np_ma
9 from ost
import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
14 """ Scorer class to compute various small molecule ligand (non polymer) scores.
19 - Python modules `numpy` and `networkx` must be available
20 (e.g. use ``pip install numpy networkx``)
22 At the moment, two scores are available:
24 * lDDT-PLI, that looks at the conservation of protein-ligand contacts
25 with :class:`lDDT <ost.mol.alg.lddt.lDDTScorer>`.
26 * Binding-site superposed, symmetry-corrected RMSD that assesses the
27 accuracy of the ligand pose (BiSyRMSD, hereinafter referred to as RMSD).
29 Both scores involve local chain mapping of the reference binding site
30 onto the model, symmetry-correction, and finally assignment (mapping)
31 of model and target ligands, as described in (Manuscript in preparation).
33 The binding site is defined based on a radius around the target ligand
34 in the reference structure only. It only contains protein and nucleic
35 acid chains that pass the criteria for the
36 :class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring
37 other ligands, waters, short polymers as well as any incorrectly connected
38 chains that may be in proximity.
40 Results are available as matrices (`(lddt_pli|rmsd)_matrix`), where every
41 target-model score is reported in a matrix; as `(lddt_pli|rmsd)` where
42 a model-target assignment has been determined (see below) and reported in
43 a dictionary; and as (`(lddt_pli|rmsd)_details`) methods, which report
44 additional details about different aspects of the scoring such as chain
47 The behavior of chain mapping and ligand assignment can be controlled
48 with the `global_chain_mapping` and `rmsd_assignment` arguments.
50 By default, chain mapping is performed locally, ie. only within the
51 binding site. As a result, different ligand scores can correspond to
52 different chain mappings. This tends to produce more favorable scores,
53 especially in large, partially regular oligomeric complexes.
54 Setting `global_chain_mapping=True` enforces a single global chain mapping,
55 as per :meth:`ost.mol.alg.chain_mapping.ChainMapper.GetMapping`.
56 Note that this global chain mapping currently ignores non polymer entities
57 such as small ligands, and may result in overly pessimistic scores.
59 By default, target-model ligand assignments are computed independently
60 for the RMSD and lDDT-PLI scores. For RMSD, each model ligand is uniquely
61 assigned to a target ligand, starting from the "best" possible mapping
62 (lowest RMSD) and using each target and model ligand in a single
63 assignment. Ties are resolved by best (highest) lDDT-PLI. Similarly,
64 for lDDT-PLI, the assignment is based on the highest lDDT-PLI, and ties
65 broken by lowest RMSD. Setting `rmsd_assignment=True` forces a single
66 ligand assignment, based on RMSD only. Ties are broken arbitrarily.
68 By default, only exact matches between target and model ligands are
69 considered. This is a problem when the target only contains a subset
70 of the expected atoms (for instance if atoms are missing in an
71 experimental structure, which often happens in the PDB). With
72 `substructure_match=True`, complete model ligands can be scored against
73 partial target ligands. One problem with this approach is that it is
74 very easy to find good matches to small, irrelevant ligands like EDO, CO2
75 or GOL. To counter that, the assignment algorithm considers the coverage,
76 expressed as the fraction of atoms of the model ligand atoms covered in the
77 target. Higher coverage matches are prioritized, but a match with a better
78 score will be preferred if it falls within a window of `coverage_delta`
79 (by default 0.2) of a worse-scoring match. As a result, for instance,
80 with a delta of 0.2, a low-score match with coverage 0.96 would be
81 preferred to a high-score match with coverage 0.90.
85 The class generally assumes that the
86 :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all
87 the ligand atoms, and only ligand atoms. This is typically the case for
88 entities loaded from mmCIF (tested with mmCIF files from the PDB and
89 SWISS-MODEL). Legacy PDB files must contain `HET` headers (which is usually
90 the case for files downloaded from the PDB but not elsewhere).
92 The class doesn't perform any cleanup of the provided structures.
93 It is up to the caller to ensure that the data is clean and suitable for
94 scoring. :ref:`Molck <molck>` should be used with extra
95 care, as many of the options (such as `rm_non_std` or `map_nonstd_res`) can
96 cause ligands to be removed from the structure. If cleanup with Molck is
97 needed, ligands should be kept aside and passed separately. Non-ligand residues
98 should be valid compounds with atom names following the naming conventions
99 of the component dictionary. Non-standard residues are acceptable, and if
100 the model contains a standard residue at that position, only atoms with
101 matching names will be considered.
103 Unlike most of OpenStructure, this class does not assume that the ligands
104 (either in the model or the target) are part of the PDB component
105 dictionary. They may have arbitrary residue names. Residue names do not
106 have to match between the model and the target. Matching is based on
107 the calculation of isomorphisms which depend on the atom element name and
108 atom connectivity (bond order is ignored).
109 It is up to the caller to ensure that the connectivity of atoms is properly
110 set before passing any ligands to this class. Ligands with improper
111 connectivity will lead to bogus results.
113 Note, however, that atom names should be unique within a residue (ie two
114 distinct atoms cannot have the same atom name).
116 This only applies to the ligand. The rest of the model and target
117 structures (protein, nucleic acids) must still follow the usual rules and
118 contain only residues from the compound library.
120 Although it isn't a requirement, hydrogen atoms should be removed from the
121 structures. Here is an example code snippet that will perform a reasonable
122 cleanup. Keep in mind that this is most likely not going to work as
123 expected with entities loaded from PDB files, as the `is_ligand` flag is
124 probably not set properly.
126 Here is a snippet example of how to use this code::
128 from ost.mol.alg.ligand_scoring import LigandScorer
129 from ost.mol.alg import Molck, MolckSettings
132 # Structure model in PDB format, containing the receptor only
133 model = io.LoadPDB("path_to_model.pdb")
134 # Ligand model as SDF file
135 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
136 # Target loaded from mmCIF, containing the ligand
137 target = io.LoadMMCIF("path_to_target.cif")
139 # Cleanup a copy of the structures
140 cleaned_model = model.Copy()
141 cleaned_target = target.Copy()
142 molck_settings = MolckSettings(rm_unk_atoms=True,
146 rm_zero_occ_atoms=False,
148 map_nonstd_res=False,
150 Molck(cleaned_model, conop.GetDefaultLib(), molck_settings)
151 Molck(cleaned_target, conop.GetDefaultLib(), molck_settings)
153 # Setup scorer object and compute lDDT-PLI
154 model_ligands = [model_ligand.Select("ele != H")]
155 ls = LigandScorer(model=cleaned_model, target=cleaned_target, model_ligands=model_ligands)
156 print("lDDT-PLI:", ls.lddt_pli)
157 print("RMSD:", ls.rmsd)
159 :param model: Model structure - a deep copy is available as :attr:`model`.
160 No additional processing (ie. Molck), checks,
161 stereochemistry checks or sanitization is performed on the
162 input. Hydrogen atoms are kept.
163 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
164 :param target: Target structure - a deep copy is available as :attr:`target`.
165 No additional processing (ie. Molck), checks or sanitization
166 is performed on the input. Hydrogen atoms are kept.
167 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
168 :param model_ligands: Model ligands, as a list of
169 :class:`~ost.mol.ResidueHandle` belonging to the model
170 entity. Can be instantiated with either a :class:list of
171 :class:`~ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView`
172 or of :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`.
173 If `None`, ligands will be extracted based on the
174 :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
175 normally set properly in entities loaded from mmCIF).
176 :type model_ligands: :class:`list`
177 :param target_ligands: Target ligands, as a list of
178 :class:`~ost.mol.ResidueHandle` belonging to the target
179 entity. Can be instantiated either a :class:list of
180 :class:`~ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView`
181 or of :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
182 containing a single residue each.
183 If `None`, ligands will be extracted based on the
184 :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
185 normally set properly in entities loaded from mmCIF).
186 :type target_ligands: :class:`list`
187 :param resnum_alignments: Whether alignments between chemically equivalent
188 chains in *model* and *target* can be computed
189 based on residue numbers. This can be assumed in
190 benchmarking setups such as CAMEO/CASP.
191 :type resnum_alignments: :class:`bool`
192 :param check_resnames: On by default. Enforces residue name matches
193 between mapped model and target residues.
194 :type check_resnames: :class:`bool`
195 :param rename_ligand_chain: If a residue with the same chain name and
196 residue number than an explicitly passed model
197 or target ligand exits in the structure,
198 and `rename_ligand_chain` is False, a
199 RuntimeError will be raised. If
200 `rename_ligand_chain` is True, the ligand will
201 be moved to a new chain instead, and the move
202 will be logged to the console with SCRIPT
204 :type rename_ligand_chain: :class:`bool`
205 :param chain_mapper: a chain mapper initialized for the target structure.
206 If None (default), a chain mapper will be initialized
208 :type chain_mapper: :class:`ost.mol.alg.chain_mapping.ChainMapper`
209 :param substructure_match: Set this to True to allow partial target ligand.
210 :type substructure_match: :class:`bool`
211 :param coverage_delta: the coverage delta for partial ligand assignment.
212 :type coverage_delta: :class:`float`
213 :param radius: Inclusion radius for the binding site. Residues with
214 atoms within this distance of the ligand will be considered
215 for inclusion in the binding site.
216 :type radius: :class:`float`
217 :param lddt_pli_radius: lDDT inclusion radius for lDDT-PLI.
218 :type lddt_pli_radius: :class:`float`
219 :param lddt_lp_radius: lDDT inclusion radius for lDDT-LP.
220 :type lddt_lp_radius: :class:`float`
221 :param binding_sites_topn: maximum number of target binding site
222 representations to assess, per target ligand.
223 Ignored if `global_chain_mapping` is True.
224 :type binding_sites_topn: :class:`int`
225 :param global_chain_mapping: set to True to use a global chain mapping for
226 the polymer (protein, nucleotide) chains.
227 Defaults to False, in which case only local
228 chain mappings are allowed (where different
229 ligand may be scored against different chain
231 :type global_chain_mapping: :class:`bool`
232 :param custom_mapping: Provide custom chain mapping between *model* and
233 *target* that is used as global chain mapping.
234 Dictionary with target chain names as key and model
235 chain names as value. Only has an effect if
236 *global_chain_mapping* is True.
237 :type custom_mapping: :class:`dict`
238 :param rmsd_assignment: assign ligands based on RMSD only. The default
239 (False) is to use a combination of lDDT-PLI and
240 RMSD for the assignment.
241 :type rmsd_assignment: :class:`bool`
242 :param n_max_naive: Parameter for global chain mapping. If *model* and
243 *target* have less or equal that number of chains,
245 mapping solution space is enumerated to find the
246 the optimum. A heuristic is used otherwise.
247 :type n_max_naive: :class:`int`
248 :param max_symmetries: If more than that many isomorphisms exist for
249 a target-ligand pair, it will be ignored and reported
251 :type max_symmetries: :class:`int`
252 :param unassigned: If True, unassigned model ligands are reported in
253 the output together with assigned ligands, with a score
254 of None, and reason for not being assigned in the
255 \\*_details matrix. Defaults to False.
256 :type unassigned: :class:`bool`
258 def __init__(self, model, target, model_ligands=None, target_ligands=None,
259 resnum_alignments=
False, check_resnames=
True,
260 rename_ligand_chain=
False,
261 chain_mapper=
None, substructure_match=
False,
263 radius=4.0, lddt_pli_radius=6.0, lddt_lp_radius=10.0,
264 binding_sites_topn=100000, global_chain_mapping=
False,
265 rmsd_assignment=
False, n_max_naive=12, max_symmetries=1e5,
266 custom_mapping=
None, unassigned=
False):
269 self.
model = mol.CreateEntityFromView(model,
False)
271 self.
model = model.Copy()
273 raise RuntimeError(
"model must be of type EntityView/EntityHandle")
276 self.
target = mol.CreateEntityFromView(target,
False)
278 self.
target = target.Copy()
280 raise RuntimeError(
"target must be of type EntityView/EntityHandle")
283 if target_ligands
is None:
290 LogWarning(
"No ligands in the target")
293 if model_ligands
is None:
300 LogWarning(
"No ligands in the model")
302 raise ValueError(
"No ligand in the model and in the target")
352 if custom_mapping
is not None:
357 """ Chain mapper object for the given :attr:`target`.
359 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
368 def _model_mapping(self):
369 """Get the global chain mapping for the model."""
376 def _extract_ligands(entity):
377 """Extract ligands from entity. Return a list of residues.
379 Assumes that ligands have the :attr:`~ost.mol.ResidueHandle.is_ligand`
380 flag set. This is typically the case for entities loaded from mmCIF
381 (tested with mmCIF files from the PDB and SWISS-MODEL).
382 Legacy PDB files must contain `HET` headers (which is usually the
383 case for files downloaded from the PDB but not elsewhere).
385 This function performs basic checks to ensure that the residues in this
386 chain are not forming polymer bonds (ie peptide/nucleotide ligands) and
387 will raise a RuntimeError if this assumption is broken.
389 :param entity: the entity to extract ligands from
390 :type entity: :class:`~ost.mol.EntityHandle`
391 :rtype: :class:`list` of :class:`~ost.mol.ResidueHandle`
394 extracted_ligands = []
395 for residue
in entity.residues:
396 if residue.is_ligand:
397 if mol.InSequence(residue, residue.next):
398 raise RuntimeError(
"Residue %s connected in polymer sequen"
399 "ce %s" % (residue.qualified_name))
400 extracted_ligands.append(residue)
401 LogVerbose(
"Detected residue %s as ligand" % residue)
402 return extracted_ligands
405 def _prepare_ligands(new_entity, old_entity, ligands, rename_chain):
406 """Prepare the ligands given into a list of ResidueHandles which are
407 part of the copied entity, suitable for the model_ligands and
408 target_ligands properties.
410 This function takes a list of ligands as (Entity|Residue)(Handle|View).
411 Entities can contain multiple ligands, which will be considered as
414 Ligands which are part of the entity are simply fetched in the new
415 copied entity. Otherwise, they are copied over to the copied entity.
417 extracted_ligands = []
422 def _copy_residue(residue, rename_chain):
423 """ Copy the residue into the new chain.
424 Return the new residue handle."""
425 nonlocal next_chain_num, new_editor
428 if new_editor
is None:
429 new_editor = new_entity.EditXCS()
431 new_chain = new_entity.FindChain(residue.chain.name)
432 if not new_chain.IsValid():
433 new_chain = new_editor.InsertChain(residue.chain.name)
436 already_exists = new_chain.FindResidue(residue.number).IsValid()
441 new_chain_name = residue.chain.name +
"_" + str(chain_ext)
442 new_chain = new_entity.FindChain(new_chain_name)
443 if new_chain.IsValid():
447 new_chain = new_editor.InsertChain(new_chain_name)
449 LogScript(
"Moved ligand residue %s to new chain %s" % (
450 residue.qualified_name, new_chain.name))
452 msg =
"A residue number %s already exists in chain %s" % (
453 residue.number, residue.chain.name)
454 raise RuntimeError(msg)
457 new_res = new_editor.AppendResidue(new_chain, residue.name, residue.number)
459 for old_atom
in residue.atoms:
460 new_editor.InsertAtom(new_res, old_atom.name, old_atom.pos,
461 element=old_atom.element, occupancy=old_atom.occupancy,
462 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
464 for old_atom
in residue.atoms:
465 for old_bond
in old_atom.bonds:
466 new_first = new_res.FindAtom(old_bond.first.name)
467 new_second = new_res.FindAtom(old_bond.second.name)
468 new_editor.Connect(new_first, new_second)
471 def _process_ligand_residue(res, rename_chain):
472 """Copy or fetch the residue. Return the residue handle."""
474 if res.entity.handle == old_entity.handle:
478 new_res = new_entity.FindResidue(res.chain.name, res.number)
479 if new_res
and new_res.valid:
480 LogVerbose(
"Ligand residue %s already in entity" % res.handle.qualified_name)
483 new_res = _copy_residue(res, rename_chain)
484 LogVerbose(
"Copied ligand residue %s" % res.handle.qualified_name)
485 new_res.SetIsLigand(
True)
488 for ligand
in ligands:
490 for residue
in ligand.residues:
491 new_residue = _process_ligand_residue(residue, rename_chain)
492 extracted_ligands.append(new_residue)
494 new_residue = _process_ligand_residue(ligand, rename_chain)
495 extracted_ligands.append(new_residue)
497 raise RuntimeError(
"Ligands should be given as Entity or Residue")
499 if new_editor
is not None:
500 new_editor.UpdateICS()
501 return extracted_ligands
503 def _get_binding_sites(self, ligand):
504 """Find representations of the binding site of *ligand* in the model.
506 Only consider protein and nucleic acid chains that pass the criteria
507 for the :class:`ost.mol.alg.chain_mapping`. This means ignoring other
508 ligands, waters, short polymers as well as any incorrectly connected
509 chain that may be in proximity.
511 :param ligand: Defines the binding site to identify.
512 :type ligand: :class:`~ost.mol.ResidueHandle`
517 ref_residues_hashes = set()
518 ignored_residue_hashes = {ligand.hash_code}
519 for ligand_at
in ligand.atoms:
520 close_atoms = self.target.FindWithin(ligand_at.GetPos(), self.
radius)
521 for close_at
in close_atoms:
523 ref_res = close_at.GetResidue()
524 h = ref_res.handle.GetHashCode()
525 if h
not in ref_residues_hashes
and \
526 h
not in ignored_residue_hashes:
527 if self.chain_mapper.target.ViewForHandle(ref_res).IsValid():
528 h = ref_res.handle.GetHashCode()
529 ref_residues_hashes.add(h)
530 elif ref_res.is_ligand:
531 LogWarning(
"Ignoring ligand %s in binding site of %s" % (
532 ref_res.qualified_name, ligand.qualified_name))
533 ignored_residue_hashes.add(h)
534 elif ref_res.chem_type == mol.ChemType.WATERS:
537 LogWarning(
"Ignoring residue %s in binding site of %s" % (
538 ref_res.qualified_name, ligand.qualified_name))
539 ignored_residue_hashes.add(h)
541 if ref_residues_hashes:
545 ref_bs = self.target.CreateEmptyView()
546 for ch
in self.target.chains:
547 for r
in ch.residues:
548 if r.handle.GetHashCode()
in ref_residues_hashes:
549 ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
550 if len(ref_bs.residues) == 0:
551 raise RuntimeError(
"Failed to add proximity residues to "
552 "the reference binding site entity")
556 self.
_binding_sites[ligand.hash_code] = self.chain_mapper.GetRepr(
560 self.
_binding_sites[ligand.hash_code] = self.chain_mapper.GetRepr(
567 "model_representation",
568 "No representation of the reference binding site was "
569 "found in the model")
574 "No residue in proximity of the target ligand")
580 def _build_binding_site_entity(ligand, residues, extra_residues=[]):
581 """ Build an entity with all the binding site residues in chain A
582 and the ligand in chain _. Residues are renumbered consecutively from
583 1. The ligand is assigned residue number 1 and residue name LIG.
584 Residues in extra_residues not in `residues` in the model are added
585 at the end of chain A.
587 :param ligand: the Residue Handle of the ligand
588 :type ligand: :class:`~ost.mol.ResidueHandle`
589 :param residues: a list of binding site residues
590 :type residues: :class:`list` of :class:`~ost.mol.ResidueHandle`
591 :param extra_residues: an optional list with addition binding site
592 residues. Residues in this list which are not
593 in `residues` will be added at the end of chain
594 A. This allows for instance adding unmapped
595 residues missing from the model into the
596 reference binding site.
597 :type extra_residues: :class:`list` of :class:`~ost.mol.ResidueHandle`
598 :rtype: :class:`~ost.mol.EntityHandle`
600 bs_ent = mol.CreateEntity()
601 ed = bs_ent.EditXCS()
602 bs_chain = ed.InsertChain(
"A")
604 for resnum, old_res
in enumerate(residues, 1):
605 seen_res_qn.append(old_res.qualified_name)
606 new_res = ed.AppendResidue(bs_chain, old_res.handle,
608 ed.SetResidueNumber(new_res,
mol.ResNum(resnum))
611 for extra_res
in extra_residues:
612 if extra_res.qualified_name
not in seen_res_qn:
614 seen_res_qn.append(extra_res.qualified_name)
615 new_res = ed.AppendResidue(bs_chain,
618 ed.SetResidueNumber(new_res,
mol.ResNum(resnum))
620 ligand_chain = ed.InsertChain(
"_")
621 ligand_res = ed.AppendResidue(ligand_chain, ligand.handle,
623 ed.RenameResidue(ligand_res,
"LIG")
624 ed.SetResidueNumber(ligand_res,
mol.ResNum(1))
629 def _compute_scores(self):
631 Compute the RMSD and lDDT-PLI scores for every possible target-model
632 ligand pair and store the result in internal matrices.
635 rmsd_full_matrix = np.empty(
637 lddt_pli_full_matrix = np.empty(
645 LogVerbose(
"Analyzing target ligand %s" % target_ligand)
648 LogVerbose(
"Found binding site with chain mapping %s" % (binding_site.GetFlatChainMapping()))
651 target_ligand, binding_site.ref_residues,
652 binding_site.substructure.residues)
653 ref_bs_ent_ligand = ref_bs_ent.FindResidue(
"_", 1)
656 ref_bs_ent_ligand.name:
657 mol.alg.lddt.CustomCompound.FromResidue(
661 custom_compounds=custom_compounds,
666 symmetries = _ComputeSymmetries(
667 model_ligand, target_ligand,
671 LogVerbose(
"Ligands %s and %s symmetry match" % (
672 str(model_ligand), str(target_ligand)))
673 except NoSymmetryError:
675 LogVerbose(
"No symmetry between %s and %s" % (
676 str(model_ligand), str(target_ligand)))
679 except TooManySymmetriesError:
681 LogVerbose(
"Too many symmetries between %s and %s" % (
682 str(model_ligand), str(target_ligand)))
685 except DisconnectedGraphError:
688 substructure_match = len(symmetries[0][0]) != len(
690 coverage = len(symmetries[0][0]) / len(model_ligand.atoms)
694 rmsd = _SCRMSD_symmetries(symmetries, model_ligand,
695 target_ligand, transformation=binding_site.transform)
696 LogDebug(
"RMSD: %.4f" % rmsd)
699 if not rmsd_full_matrix[target_i, model_i]
or \
700 rmsd_full_matrix[target_i, model_i][
"rmsd"] > rmsd:
701 rmsd_full_matrix[target_i, model_i] = {
703 "lddt_lp": binding_site.lDDT,
704 "bs_ref_res": binding_site.substructure.residues,
705 "bs_ref_res_mapped": binding_site.ref_residues,
706 "bs_mdl_res_mapped": binding_site.mdl_residues,
707 "bb_rmsd": binding_site.bb_rmsd,
708 "target_ligand": target_ligand,
709 "model_ligand": model_ligand,
710 "chain_mapping": binding_site.GetFlatChainMapping(),
711 "transform": binding_site.transform,
712 "substructure_match": substructure_match,
713 "coverage": coverage,
714 "inconsistent_residues": binding_site.inconsistent_residues,
717 rmsd_full_matrix[target_i, model_i][
718 "unassigned"] =
False
719 LogDebug(
"Saved RMSD")
722 model_ligand, binding_site.mdl_residues, [])
723 mdl_bs_ent_ligand = mdl_bs_ent.FindResidue(
"_", 1)
727 mdl_editor = mdl_bs_ent.EditXCS()
728 for i, (trg_sym, mdl_sym)
in enumerate(symmetries):
730 for mdl_anum, trg_anum
in zip(mdl_sym, trg_sym):
732 trg_atom = ref_bs_ent_ligand.atoms[trg_anum]
733 mdl_atom = mdl_bs_ent_ligand.atoms[mdl_anum]
734 mdl_editor.RenameAtom(mdl_atom, trg_atom.name)
735 mdl_editor.UpdateICS()
737 global_lddt, local_lddt, lddt_tot, lddt_cons, n_res, \
738 n_cont, n_cons = lddt_scorer.lDDT(
739 mdl_bs_ent, chain_mapping={
"A":
"A",
"_":
"_"},
741 return_dist_test=
True,
743 LogDebug(
"lDDT-PLI for symmetry %d: %.4f" % (i, global_lddt))
746 if not lddt_pli_full_matrix[target_i, model_i]:
750 last_best_lddt = lddt_pli_full_matrix[
751 target_i, model_i][
"lddt_pli"]
752 last_best_rmsd = lddt_pli_full_matrix[
753 target_i, model_i][
"rmsd"]
754 if global_lddt > last_best_lddt:
757 elif global_lddt == last_best_lddt
and \
758 rmsd < last_best_rmsd:
764 lddt_pli_full_matrix[target_i, model_i] = {
765 "lddt_pli": global_lddt,
767 "lddt_lp": binding_site.lDDT,
768 "lddt_pli_n_contacts": lddt_tot,
769 "bs_ref_res": binding_site.substructure.residues,
770 "bs_ref_res_mapped": binding_site.ref_residues,
771 "bs_mdl_res_mapped": binding_site.mdl_residues,
772 "bb_rmsd": binding_site.bb_rmsd,
773 "target_ligand": target_ligand,
774 "model_ligand": model_ligand,
775 "chain_mapping": binding_site.GetFlatChainMapping(),
776 "transform": binding_site.transform,
777 "substructure_match": substructure_match,
778 "coverage": coverage,
779 "inconsistent_residues": binding_site.inconsistent_residues,
782 lddt_pli_full_matrix[target_i, model_i][
783 "unassigned"] =
False
784 LogDebug(
"Saved lDDT-PLI")
790 def _find_ligand_assignment(mat1, mat2=None, coverage=None, coverage_delta=None):
791 """ Find the ligand assignment based on mat1. If mat2 is provided, it
792 will be used to break ties in mat1. If mat2 is not provided, ties will
793 be resolved by taking the first match arbitrarily.
795 Both mat1 and mat2 should "look" like RMSD - ie be between inf (bad)
802 mat2[~np.isnan(mat2)] = np.inf
806 coverage = np.copy(mat1)
809 coverage = np.copy(coverage)
814 LogDebug(
"No model or target ligand, returning no assignment.")
817 def _get_best_match(mat1_val, coverage_val):
818 """ Extract the row/column indices of the prediction matching the
820 mat1_match_idx = np.argwhere((mat1 == mat1_val) & (coverage >= coverage_val))
822 if len(mat1_match_idx) > 1:
824 best_mat2_match = [mat2[tuple(x)]
for x
in mat1_match_idx]
827 best_mat2_idx = np.array(best_mat2_match).argmin()
829 return mat1_match_idx[best_mat2_idx]
831 return mat1_match_idx[0]
834 min_coverage = np.max(coverage)
835 while min_coverage > 0:
836 LogVerbose(
"Looking for matches with coverage >= %s" % min_coverage)
837 min_mat1 = LigandScorer._nanmin_nowarn(mat1, coverage < min_coverage)
838 while not np.isnan(min_mat1):
839 max_i_trg, max_i_mdl = _get_best_match(min_mat1, min_coverage)
843 alternative_matches = (mat1[:, max_i_mdl] < min_mat1) & (
844 coverage[:, max_i_mdl] > (min_coverage - coverage_delta))
845 if np.any(alternative_matches):
847 LogVerbose(
"Found match with lower coverage but better score")
848 min_mat1 = np.nanmin(mat1[alternative_matches])
849 max_i_trg, max_i_mdl = _get_best_match(min_mat1, min_coverage - coverage_delta)
852 mat1[max_i_trg, :] = np.nan
853 mat1[:, max_i_mdl] = np.nan
854 mat2[max_i_trg, :] = np.nan
855 mat2[:, max_i_mdl] = np.nan
856 coverage[max_i_trg, :] = -np.inf
857 coverage[:, max_i_mdl] = -np.inf
860 assignments.append((max_i_trg, max_i_mdl))
863 min_mat1 = LigandScorer._nanmin_nowarn(mat1, coverage < min_coverage)
865 min_coverage = np.max(coverage)
869 def _nanmin_nowarn(array, mask):
870 """Compute np.nanmin but ignore the RuntimeWarning."""
871 masked_array = np_ma.masked_array(array, mask=mask)
872 with warnings.catch_warnings():
873 warnings.simplefilter(
"ignore")
874 min = np.nanmin(masked_array, )
875 if np_ma.is_masked(min):
881 def _reverse_lddt(lddt):
882 """Reverse lDDT means turning it from a number between 0 and 1 to a
883 number between infinity and 0 (0 being better).
885 In practice, this is 1/lDDT. If lDDT is 0, the result is infinity.
887 with warnings.catch_warnings():
888 warnings.simplefilter(
"ignore")
889 return np.float64(1) / lddt
891 def _assign_ligands_rmsd(self):
892 """Assign (map) ligands between model and target.
894 Sets self._rmsd and self._rmsd_details.
902 self.
_rmsd = mat_tuple[0]
909 def _assign_matrices(self, mat1, mat2, data, main_key):
911 Perform the ligand assignment, ie find the mapping between model and
914 The algorithm starts by assigning the "best" mapping, and then discards
915 the target and model ligands (row, column) so that every model ligand
916 can be assigned to a single target ligand, and every target ligand
917 is only assigned to a single model ligand. Repeat until there is
918 nothing left to assign.
920 In case of a tie in values in `mat1`, it uses `mat2` to break the tie.
922 This algorithm doesn't guarantee a globally optimal assignment.
924 Both `mat1` and `mat2` should contain values between 0 and infinity,
925 with lower values representing better scores. Use the
926 :meth:`_reverse_lddt` method to convert lDDT values to such a score.
928 :param mat1: the main ligand assignment criteria (RMSD or lDDT-PLI)
929 :param mat2: the secondary ligand assignment criteria (lDDT-PLI or RMSD)
930 :param data: the data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
931 :param main_key: the key of data (dictionnaries within `data`) to
932 assign into out_main.
933 :return: a tuple with 2 dictionaries of matrices containing the main
934 data, and details, respectively.
943 for assignment
in assignments:
944 trg_idx, mdl_idx = assignment
945 assigned_mdl[mdl_idx] =
True
946 assigned_trg[trg_idx] =
True
948 mdl_cname = mdl_lig.chain.name
949 mdl_resnum = mdl_lig.number
950 if mdl_cname
not in out_main:
951 out_main[mdl_cname] = {}
952 out_details[mdl_cname] = {}
953 out_main[mdl_cname][mdl_resnum] = data[
954 trg_idx, mdl_idx][main_key]
955 out_details[mdl_cname][mdl_resnum] = data[
959 assigned_trg, assigned_mdl, [out_main], [out_details], [main_key])
960 return out_main, out_details, unassigned_trg, unassigned_mdl
962 def _assign_unassigned(self, assigned_trg, assigned_mdl,
963 out_main, out_details, main_key):
967 unassigned_trg_idx = [i
for i, x
in enumerate(assigned_trg)
if not x]
968 unassigned_mdl_idx = [i
for i, x
in enumerate(assigned_mdl)
if not x]
970 for mdl_idx
in unassigned_mdl_idx:
973 mdl_cname = mdl_lig.chain.name
974 mdl_resnum = mdl_lig.number
975 if mdl_cname
not in unassigned_mdl:
976 unassigned_mdl[mdl_cname] = {}
977 unassigned_mdl[mdl_cname][mdl_resnum] = reason
979 for i, _
in enumerate(out_main):
980 if mdl_cname
not in out_main[i]:
981 out_main[i][mdl_cname] = {}
982 out_details[i][mdl_cname] = {}
983 out_main[i][mdl_cname][mdl_resnum] =
None
984 out_details[i][mdl_cname][mdl_resnum] = {
986 "reason_short": reason[0],
987 "reason_long": reason[1],
990 LogInfo(
"Model ligand %s is unassigned: %s" % (
991 mdl_lig.qualified_name, reason[1]))
993 for trg_idx
in unassigned_trg_idx:
996 trg_cname = trg_lig.chain.name
997 trg_resnum = trg_lig.number
998 if trg_cname
not in unassigned_trg:
999 unassigned_trg[trg_cname] = {}
1000 unassigned_trg[trg_cname][trg_resnum] = reason
1001 LogInfo(
"Target ligand %s is unassigned: %s" % (
1002 trg_lig.qualified_name, reason[1]))
1004 return unassigned_trg, unassigned_mdl
1007 def _assign_matrix(self, mat, data1, main_key1, data2, main_key2):
1009 Perform the ligand assignment, ie find the mapping between model and
1010 target ligands, based on a single matrix
1012 The algorithm starts by assigning the "best" mapping, and then discards
1013 the target and model ligands (row, column) so that every model ligand
1014 can be assigned to a single target ligand, and every target ligand
1015 is only assigned to a single model ligand. Repeat until there is
1016 nothing left to assign.
1018 This algorithm doesn't guarantee a globally optimal assignment.
1020 `mat` should contain values between 0 and infinity,
1021 with lower values representing better scores. Use the
1022 :meth:`_reverse_lddt` method to convert lDDT values to such a score.
1024 :param mat: the ligand assignment criteria (RMSD or lDDT-PLI)
1025 :param data1: the first data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
1026 :param main_key1: the first key of data (dictionnaries within `data`) to
1027 assign into out_main.
1028 :param data2: the second data (either self._rmsd_full_matrix or self._lddt_pli_matrix)
1029 :param main_key2: the second key of data (dictionnaries within `data`) to
1030 assign into out_main.
1031 :return: a tuple with 4 dictionaries of matrices containing the main
1032 data1, details1, main data2 and details2, respectively.
1043 for assignment
in assignments:
1044 trg_idx, mdl_idx = assignment
1045 assigned_mdl[mdl_idx] =
True
1046 assigned_trg[trg_idx] =
True
1048 mdl_cname = mdl_lig.chain.name
1049 mdl_resnum = mdl_lig.number
1051 if mdl_cname
not in out_main1:
1052 out_main1[mdl_cname] = {}
1053 out_details1[mdl_cname] = {}
1054 out_main1[mdl_cname][mdl_resnum] = data1[
1055 trg_idx, mdl_idx][main_key1]
1056 out_details1[mdl_cname][mdl_resnum] = data1[
1059 if mdl_cname
not in out_main2:
1060 out_main2[mdl_cname] = {}
1061 out_details2[mdl_cname] = {}
1062 out_main2[mdl_cname][mdl_resnum] = data2[
1063 trg_idx, mdl_idx][main_key2]
1064 out_details2[mdl_cname][mdl_resnum] = data2[
1068 assigned_trg, assigned_mdl,
1069 [out_main1, out_main2], [out_details1, out_details2],
1070 [main_key1, main_key2])
1072 return out_main1, out_details1, out_main2, out_details2, \
1073 unassigned_trg, unassigned_mdl
1075 def _assign_ligands_lddt_pli(self):
1076 """ Assign ligands based on lDDT-PLI.
1078 Sets self._lddt_pli and self._lddt_pli_details.
1091 def _assign_ligands_rmsd_only(self):
1092 """Assign (map) ligands between model and target based on RMSD only.
1094 Sets self._rmsd, self._rmsd_details, self._lddt_pli and
1095 self._lddt_pli_details.
1102 self.
_rmsd = mat_tuple[0]
1111 """ Get the matrix of RMSD values.
1113 Target ligands are in rows, model ligands in columns.
1115 NaN values indicate that no RMSD could be computed (i.e. different
1118 :rtype: :class:`~numpy.ndarray`
1124 shape = self._rmsd_full_matrix.shape
1126 for i, j
in np.ndindex(shape):
1134 """ Get the matrix of lDDT-PLI values.
1136 Target ligands are in rows, model ligands in columns.
1138 NaN values indicate that no lDDT-PLI could be computed (i.e. different
1141 :rtype: :class:`~numpy.ndarray`
1147 shape = self._lddt_pli_full_matrix.shape
1149 for i, j
in np.ndindex(shape):
1157 """ Get the matrix of model ligand atom coverage in the target.
1159 Target ligands are in rows, model ligands in columns.
1161 A value of 0 indicates that there was no isomorphism between the model
1162 and target ligands. If `substructure_match=False`, only full match
1163 isomorphisms are considered, and therefore only values of 1.0 and 0.0
1166 :rtype: :class:`~numpy.ndarray`
1174 """Get a dictionary of RMSD score values, keyed by model ligand
1175 (chain name, :class:`~ost.mol.ResNum`).
1177 If the scoring object was instantiated with `unassigned=True`, some
1178 scores may be `None`.
1180 :rtype: :class:`dict`
1182 if self.
_rmsd is None:
1191 """Get a dictionary of RMSD score details (dictionaries), keyed by
1192 model ligand (chain name, :class:`~ost.mol.ResNum`).
1194 The value is a dictionary. For ligands that were assigned (mapped) to
1195 the target, the dictionary contain the following information:
1197 * `rmsd`: the RMSD score value.
1198 * `lddt_lp`: the lDDT score of the ligand pocket (lDDT-LP).
1199 * `bs_ref_res`: a list of residues (:class:`~ost.mol.ResidueHandle`)
1200 that define the binding site in the reference.
1201 * `bs_ref_res_mapped`: a list of residues
1202 (:class:`~ost.mol.ResidueHandle`) in the reference binding site
1203 that could be mapped to the model.
1204 * `bs_mdl_res_mapped`: a list of residues
1205 (:class:`~ost.mol.ResidueHandle`) in the model that were mapped to
1206 the reference binding site. The residues are in the same order as
1207 `bs_ref_res_mapped`.
1208 * `bb_rmsd`: the RMSD of the binding site backbone after superposition
1209 * `target_ligand`: residue handle of the target ligand.
1210 * `model_ligand`: residue handle of the model ligand.
1211 * `chain_mapping`: local chain mapping as a dictionary, with target
1212 chain name as key and model chain name as value.
1213 * `transform`: transformation to superpose the model onto the target.
1214 * `substructure_match`: whether the score is the result of a partial
1215 (substructure) match. A value of `True` indicates that the target
1216 ligand covers only part of the model, while `False` indicates a
1218 * `coverage`: the fraction of model atoms covered by the assigned
1219 target ligand, in the interval (0, 1]. If `substructure_match`
1220 is `False`, this will always be 1.
1221 * `inconsistent_residues`: a list of tuples of mapped residues views
1222 (:class:`~ost.mol.ResidueView`) with residue names that differ
1223 between the reference and the model, respectively.
1224 The list is empty if all residue names match, which is guaranteed
1225 if `check_resnames=True`.
1226 Note: more binding site mappings may be explored during scoring,
1227 but only inconsistencies in the selected mapping are reported.
1228 * `unassigned`: only if the scorer was instantiated with
1229 `unassigned=True`: `False`
1231 If the scoring object was instantiated with `unassigned=True`, in
1232 addition the unassigned ligands will be reported with a score of `None`
1233 and the following information:
1235 * `unassigned`: `True`,
1236 * `reason_short`: a short token of the reason, see
1237 :attr:`unassigned_model_ligands` for details and meaning.
1238 * `reason_long`: a human-readable text of the reason, see
1239 :attr:`unassigned_model_ligands` for details and meaning.
1242 :rtype: :class:`dict`
1253 """Get a dictionary of lDDT-PLI score values, keyed by model ligand
1254 (chain name, :class:`~ost.mol.ResNum`).
1256 If the scoring object was instantiated with `unassigned=True`, some
1257 scores may be `None`.
1259 :rtype: :class:`dict`
1270 """Get a dictionary of lDDT-PLI score details (dictionaries), keyed by
1271 model ligand (chain name, :class:`~ost.mol.ResNum`).
1273 Each sub-dictionary contains the following information:
1275 * `lddt_pli`: the lDDT-PLI score value.
1276 * `rmsd`: the RMSD score value corresponding to the lDDT-PLI
1277 chain mapping and assignment. This may differ from the RMSD-based
1278 assignment. Note that a different isomorphism than `lddt_pli` may
1280 * `lddt_lp`: the lDDT score of the ligand pocket (lDDT-LP).
1281 * `lddt_pli_n_contacts`: number of total contacts used in lDDT-PLI,
1282 summed over all thresholds. Can be divided by 8 to obtain the number
1284 * `bs_ref_res`: a list of residues (:class:`~ost.mol.ResidueHandle`)
1285 that define the binding site in the reference.
1286 * `bs_ref_res_mapped`: a list of residues
1287 (:class:`~ost.mol.ResidueHandle`) in the reference binding site
1288 that could be mapped to the model.
1289 * `bs_mdl_res_mapped`: a list of residues
1290 (:class:`~ost.mol.ResidueHandle`) in the model that were mapped to
1291 the reference binding site. The residues are in the same order as
1292 `bs_ref_res_mapped`.
1293 * `bb_rmsd`: the RMSD of the binding site backbone after superposition.
1294 Note: not used for lDDT-PLI computation.
1295 * `target_ligand`: residue handle of the target ligand.
1296 * `model_ligand`: residue handle of the model ligand.
1297 * `chain_mapping`: local chain mapping as a dictionary, with target
1298 chain name as key and model chain name as value.
1299 * `transform`: transformation to superpose the model onto the target
1301 * `substructure_match`: whether the score is the result of a partial
1302 (substructure) match. A value of `True` indicates that the target
1303 ligand covers only part of the model, while `False` indicates a
1305 * `inconsistent_residues`: a list of tuples of mapped residues views
1306 (:class:`~ost.mol.ResidueView`) with residue names that differ
1307 between the reference and the model, respectively.
1308 The list is empty if all residue names match, which is guaranteed
1309 if `check_resnames=True`.
1310 Note: more binding site mappings may be explored during scoring,
1311 but only inconsistencies in the selected mapping are reported.
1312 * `unassigned`: only if the scorer was instantiated with
1313 `unassigned=True`: `False`
1315 If the scoring object was instantiated with `unassigned=True`, in
1316 addition the unmapped ligands will be reported with a score of `None`
1317 and the following information:
1319 * `unassigned`: `True`,
1320 * `reason_short`: a short token of the reason, see
1321 :attr:`unassigned_model_ligands` for details and meaning.
1322 * `reason_long`: a human-readable text of the reason, see
1323 :attr:`unassigned_model_ligands` for details and meaning.
1324 * `lddt_pli`: `None`
1326 :rtype: :class:`dict`
1337 """Get a dictionary of target ligands not assigned to any model ligand,
1338 keyed by target ligand (chain name, :class:`~ost.mol.ResNum`).
1340 The assignment for the lDDT-PLI score is used (and is controlled
1341 by the `rmsd_assignment` argument).
1343 Each item contains a string from a controlled dictionary
1344 about the reason for the absence of assignment.
1345 A human-readable description can be obtained from the
1346 :attr:`unassigned_target_ligand_descriptions` property.
1348 Currently, the following reasons are reported:
1350 * `no_ligand`: there was no ligand in the model.
1351 * `disconnected`: the ligand graph was disconnected.
1352 * `binding_site`: no residues were in proximity of the ligand.
1353 * `model_representation`: no representation of the reference binding
1354 site was found in the model. (I.e. the binding site was not modeled.
1355 Remember: the binding site is defined in the target structure,
1356 the position of the model ligand itself is ignored at this point.)
1357 * `identity`: the ligand was not found in the model (by graph
1358 isomorphism). Check your ligand connectivity, and enable the
1359 `substructure_match` option if the target ligand is incomplete.
1360 * `stoichiometry`: there was a possible assignment in the model, but
1361 the model ligand was already assigned to a different target ligand.
1362 This indicates different stoichiometries.
1363 * `symmetries`: too many symmetries were found (by graph isomorphisms).
1364 Increase `max_symmetries`.
1366 Some of these reasons can be overlapping, but a single reason will be
1369 :rtype: :class:`dict`
1378 for cname, res
in self._unassigned_target_ligands.items():
1380 for resnum, val
in res.items():
1387 """Get a human-readable description of why target ligands were
1388 unassigned, as a dictionary keyed by the controlled dictionary
1389 from :attr:`unassigned_target_ligands`.
1397 """Get a dictionary of model ligands not assigned to any target ligand,
1398 keyed by model ligand (chain name, :class:`~ost.mol.ResNum`).
1400 The assignment for the lDDT-PLI score is used (and is controlled
1401 by the `rmsd_assignment` argument).
1403 Each item contains a string from a controlled dictionary
1404 about the reason for the absence of assignment.
1405 A human-readable description can be obtained from the
1406 :attr:`unassigned_model_ligand_descriptions` property.
1407 Currently, the following reasons are reported:
1409 * `no_ligand`: there was no ligand in the target.
1410 * `disconnected`: the ligand graph is disconnected.
1411 * `binding_site`: a potential assignment was found in the target, but
1412 there were no polymer residues in proximity of the ligand in the
1414 * `model_representation`: a potential assignment was found in the target,
1415 but no representation of the binding site was found in the model.
1416 (I.e. the binding site was not modeled. Remember: the binding site
1417 is defined in the target structure, the position of the model ligand
1418 itself is ignored at this point.)
1419 * `identity`: the ligand was not found in the target (by graph
1420 isomorphism). Check your ligand connectivity, and enable the
1421 `substructure_match` option if the target ligand is incomplete.
1422 * `stoichiometry`: there was a possible assignment in the target, but
1423 the model target was already assigned to a different model ligand.
1424 This indicates different stoichiometries.
1425 * `symmetries`: too many symmetries were found (by graph isomorphisms).
1426 Increase `max_symmetries`.
1428 Some of these reasons can be overlapping, but a single reason will be
1431 :rtype: :class:`dict`
1440 for cname, res
in self._unassigned_model_ligands.items():
1442 for resnum, val
in res.items():
1449 """Get a human-readable description of why model ligands were
1450 unassigned, as a dictionary keyed by the controlled dictionary
1451 from :attr:`unassigned_model_ligands`.
1458 def _set_custom_mapping(self, mapping):
1459 """ sets self.__model_mapping with a full blown MappingResult object
1461 :param mapping: mapping with trg chains as key and mdl ch as values
1462 :type mapping: :class:`dict`
1465 chem_mapping, chem_group_alns, mdl = \
1466 chain_mapper.GetChemMapping(self.
model)
1471 if len(mapping) != len(set(mapping.keys())):
1472 raise RuntimeError(f
"Expect unique trg chain names in mapping. Got "
1473 f
"{mapping.keys()}")
1474 if len(mapping) != len(set(mapping.values())):
1475 raise RuntimeError(f
"Expect unique mdl chain names in mapping. Got "
1476 f
"{mapping.values()}")
1478 trg_chains = set([ch.GetName()
for ch
in chain_mapper.target.chains])
1479 mdl_chains = set([ch.GetName()
for ch
in mdl.chains])
1480 for k,v
in mapping.items():
1481 if k
not in trg_chains:
1482 raise RuntimeError(f
"Target chain \"{k}\" is not available "
1483 f
"in target processed for chain mapping "
1485 if v
not in mdl_chains:
1486 raise RuntimeError(f
"Model chain \"{v}\" is not available "
1487 f
"in model processed for chain mapping "
1490 for trg_ch, mdl_ch
in mapping.items():
1491 trg_group_idx =
None
1492 mdl_group_idx =
None
1493 for idx, group
in enumerate(chain_mapper.chem_groups):
1497 for idx, group
in enumerate(chem_mapping):
1501 if trg_group_idx
is None or mdl_group_idx
is None:
1502 raise RuntimeError(
"Could not establish a valid chem grouping "
1503 "of chain names provided in custom mapping.")
1505 if trg_group_idx != mdl_group_idx:
1506 raise RuntimeError(f
"Chem group mismatch in custom mapping: "
1507 f
"target chain \"{trg_ch}\" groups with the "
1508 f
"following chemically equivalent target "
1510 f
"{chain_mapper.chem_groups[trg_group_idx]} "
1511 f
"but model chain \"{mdl_ch}\" maps to the "
1512 f
"following target chains: "
1513 f
"{chain_mapper.chem_groups[mdl_group_idx]}")
1515 pairs = set([(trg_ch, mdl_ch)
for trg_ch, mdl_ch
in mapping.items()])
1517 chain_mapping._GetRefMdlAlns(chain_mapper.chem_groups,
1518 chain_mapper.chem_group_alignments,
1524 final_mapping = list()
1525 for ref_chains
in chain_mapper.chem_groups:
1526 mapped_mdl_chains = list()
1527 for ref_ch
in ref_chains:
1528 if ref_ch
in mapping:
1529 mapped_mdl_chains.append(mapping[ref_ch])
1531 mapped_mdl_chains.append(
None)
1532 final_mapping.append(mapped_mdl_chains)
1535 for ref_group, mdl_group
in zip(chain_mapper.chem_groups,
1537 for ref_ch, mdl_ch
in zip(ref_group, mdl_group):
1538 if ref_ch
is not None and mdl_ch
is not None:
1539 aln = ref_mdl_alns[(ref_ch, mdl_ch)]
1540 trg_view = chain_mapper.target.Select(f
"cname={ref_ch}")
1541 mdl_view = mdl.Select(f
"cname={mdl_ch}")
1542 aln.AttachView(0, trg_view)
1543 aln.AttachView(1, mdl_view)
1544 alns[(ref_ch, mdl_ch)] = aln
1547 chain_mapper.chem_groups,
1549 final_mapping, alns)
1551 def _find_unassigned_model_ligand_reason(self, ligand, assignment="lddt_pli", check=True):
1554 ligand_idx = self.model_ligands.index(ligand)
1557 raise ValueError(
"Ligand %s is not in self.model_ligands" % ligand)
1561 details = getattr(self, assignment +
"_details")
1562 if ligand.chain.name
in details
and ligand.number
in details[ligand.chain.name]:
1563 ligand_details = details[ligand.chain.name][ligand.number]
1564 if not (
"unassigned" in ligand_details
and ligand_details[
"unassigned"]):
1565 raise RuntimeError(
"Ligand %s is mapped to %s" % (ligand, ligand_details[
"target_ligand"]))
1569 return (
"no_ligand",
"No ligand in the target")
1572 graph = _ResidueToGraph(ligand)
1573 if not networkx.is_connected(graph):
1574 return (
"disconnected",
"Ligand graph is disconnected")
1578 if np.isnan(assigned):
1585 return_symmetries=
False)
1586 except (NoSymmetryError, DisconnectedGraphError):
1588 except TooManySymmetriesError:
1596 assignment_matrix = getattr(self, assignment +
"_matrix")
1597 all_nan = np.all(np.isnan(assignment_matrix[:, ligand_idx]))
1605 return (
"stoichiometry",
1606 "Ligand was already assigned to an other "
1607 "model ligand (different stoichiometry)")
1608 elif assigned == -1:
1610 return (
"symmetries",
1611 "Too many symmetries were found.")
1615 iso =
"subgraph isomorphism"
1617 iso =
"full graph isomorphism"
1618 return (
"identity",
"Ligand was not found in the target (by %s)" % iso)
1620 def _find_unassigned_target_ligand_reason(self, ligand, assignment="lddt_pli", check=True):
1623 ligand_idx = self.target_ligands.index(ligand)
1626 raise ValueError(
"Ligand %s is not in self.target_ligands" % ligand)
1630 details = getattr(self, assignment +
"_details")
1631 for cname, chain_ligands
in details.items():
1632 for rnum, details
in chain_ligands.items():
1633 if "unassigned" in details
and details[
"unassigned"]:
1635 if details[
'target_ligand'] == ligand:
1636 raise RuntimeError(
"Ligand %s is mapped to %s.%s" % (
1637 ligand, cname, rnum))
1641 return (
"no_ligand",
"No ligand in the model")
1644 graph = _ResidueToGraph(ligand)
1645 if not networkx.is_connected(graph):
1646 return (
"disconnected",
"Ligand graph is disconnected")
1652 for model_lig_idx, assigned
in enumerate(
1654 if np.isnan(assigned):
1661 return_symmetries=
False)
1662 except (NoSymmetryError, DisconnectedGraphError):
1664 except TooManySymmetriesError:
1671 return (
"stoichiometry",
1672 "Ligand was already assigned to an other "
1673 "target ligand (different stoichiometry)")
1674 elif assigned == -1:
1676 return (
"symmetries",
1677 "Too many symmetries were found.")
1681 iso =
"subgraph isomorphism"
1683 iso =
"full graph isomorphism"
1684 return (
"identity",
"Ligand was not found in the model (by %s)" % iso)
1687 def _ResidueToGraph(residue, by_atom_index=False):
1688 """Return a NetworkX graph representation of the residue.
1690 :param residue: the residue from which to derive the graph
1691 :type residue: :class:`ost.mol.ResidueHandle` or
1692 :class:`ost.mol.ResidueView`
1693 :param by_atom_index: Set this parameter to True if you need the nodes to
1694 be labeled by atom index (within the residue).
1695 Otherwise, if False, the nodes will be labeled by
1697 :type by_atom_index: :class:`bool`
1698 :rtype: :class:`~networkx.classes.graph.Graph`
1700 Nodes are labeled with the Atom's uppercase :attr:`~ost.mol.AtomHandle.element`.
1702 nxg = networkx.Graph()
1704 for atom
in residue.atoms:
1705 nxg.add_node(atom.name, element=atom.element.upper())
1709 nxg.add_edges_from([(
1711 b.second.name)
for a
in residue.atoms
for b
in a.GetBondList()])
1714 nxg = networkx.relabel_nodes(nxg,
1715 {a: b
for a, b
in zip(
1716 [a.name
for a
in residue.atoms],
1717 range(len(residue.atoms)))},
1722 def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(),
1723 substructure_match=
False, max_symmetries=1e6):
1724 """Calculate symmetry-corrected RMSD.
1726 Binding site superposition must be computed separately and passed as
1729 :param model_ligand: The model ligand
1730 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1731 :class:`ost.mol.ResidueView`
1732 :param target_ligand: The target ligand
1733 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1734 :class:`ost.mol.ResidueView`
1735 :param transformation: Optional transformation to apply on each atom
1736 position of model_ligand.
1737 :type transformation: :class:`ost.geom.Mat4`
1738 :param substructure_match: Set this to True to allow partial target
1740 :type substructure_match: :class:`bool`
1741 :param max_symmetries: If more than that many isomorphisms exist, raise
1742 a :class:`TooManySymmetriesError`. This can only be assessed by
1743 generating at least that many isomorphisms and can take some time.
1744 :type max_symmetries: :class:`int`
1745 :rtype: :class:`float`
1746 :raises: :class:`NoSymmetryError` when no symmetry can be found,
1747 :class:`DisconnectedGraphError` when ligand graph is disconnected,
1748 :class:`TooManySymmetriesError` when more than `max_symmetries`
1749 isomorphisms are found.
1752 symmetries = _ComputeSymmetries(model_ligand, target_ligand,
1753 substructure_match=substructure_match,
1755 max_symmetries=max_symmetries)
1756 return _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
1760 def _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
1762 """Compute SCRMSD with pre-computed symmetries. Internal. """
1765 for i, (trg_sym, mdl_sym)
in enumerate(symmetries):
1767 trg_lig_rmsd_ent = mol.CreateEntity()
1768 trg_lig_rmsd_editor = trg_lig_rmsd_ent.EditXCS()
1769 trg_lig_rmsd_chain = trg_lig_rmsd_editor.InsertChain(
"_")
1770 trg_lig_rmsd_res = trg_lig_rmsd_editor.AppendResidue(trg_lig_rmsd_chain,
"LIG")
1772 mdl_lig_rmsd_ent = mol.CreateEntity()
1773 mdl_lig_rmsd_editor = mdl_lig_rmsd_ent.EditXCS()
1774 mdl_lig_rmsd_chain = mdl_lig_rmsd_editor.InsertChain(
"_")
1775 mdl_lig_rmsd_res = mdl_lig_rmsd_editor.AppendResidue(mdl_lig_rmsd_chain,
"LIG")
1777 for mdl_anum, trg_anum
in zip(mdl_sym, trg_sym):
1779 trg_atom = target_ligand.atoms[trg_anum]
1780 mdl_atom = model_ligand.atoms[mdl_anum]
1782 trg_lig_rmsd_editor.InsertAtom(trg_lig_rmsd_res, trg_atom.name, trg_atom.pos)
1783 mdl_lig_rmsd_editor.InsertAtom(mdl_lig_rmsd_res, mdl_atom.name, mdl_atom.pos)
1785 trg_lig_rmsd_editor.UpdateICS()
1786 mdl_lig_rmsd_editor.UpdateICS()
1788 rmsd = mol.alg.CalculateRMSD(mdl_lig_rmsd_ent.CreateFullView(),
1789 trg_lig_rmsd_ent.CreateFullView(),
1791 if rmsd < best_rmsd:
1797 def _ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1798 by_atom_index=
False, return_symmetries=
True,
1799 max_symmetries=1e6):
1800 """Return a list of symmetries (isomorphisms) of the model onto the target
1803 :param model_ligand: The model ligand
1804 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1805 :class:`ost.mol.ResidueView`
1806 :param target_ligand: The target ligand
1807 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1808 :class:`ost.mol.ResidueView`
1809 :param substructure_match: Set this to True to allow partial ligands
1811 :type substructure_match: :class:`bool`
1812 :param by_atom_index: Set this parameter to True if you need the symmetries
1813 to refer to atom index (within the residue).
1814 Otherwise, if False, the symmetries refer to atom
1816 :type by_atom_index: :class:`bool`
1817 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1818 return True if a mapping is found (and raise if
1819 no mapping is found). This is useful to quickly
1820 find out if a mapping exist without the expensive
1821 step to find all the mappings.
1822 :type return_symmetries: :class:`bool`
1823 :param max_symmetries: If more than that many isomorphisms exist, raise
1824 a :class:`TooManySymmetriesError`. This can only be assessed by
1825 generating at least that many isomorphisms and can take some time.
1826 :type max_symmetries: :class:`int`
1827 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1828 :class:`TooManySymmetriesError` when more than `max_symmetries`
1829 isomorphisms are found.
1834 model_graph = _ResidueToGraph(model_ligand, by_atom_index=by_atom_index)
1835 target_graph = _ResidueToGraph(target_ligand, by_atom_index=by_atom_index)
1837 if not networkx.is_connected(model_graph):
1839 if not networkx.is_connected(target_graph):
1846 gm = networkx.algorithms.isomorphism.GraphMatcher(
1847 model_graph, target_graph, node_match=
lambda x, y:
1848 x[
"element"] == y[
"element"])
1849 if gm.is_isomorphic():
1850 if not return_symmetries:
1853 for i, isomorphism
in enumerate(gm.isomorphisms_iter()):
1854 if i >= max_symmetries:
1856 "Too many symmetries between %s and %s" % (
1857 str(model_ligand), str(target_ligand)))
1858 symmetries.append((list(isomorphism.values()), list(isomorphism.keys())))
1859 assert len(symmetries) > 0
1860 LogDebug(
"Found %s isomorphic mappings (symmetries)" % len(symmetries))
1861 elif gm.subgraph_is_isomorphic()
and substructure_match:
1862 if not return_symmetries:
1865 for i, isomorphism
in enumerate(gm.subgraph_isomorphisms_iter()):
1866 if i >= max_symmetries:
1868 "Too many symmetries between %s and %s" % (
1869 str(model_ligand), str(target_ligand)))
1870 symmetries.append((list(isomorphism.values()), list(isomorphism.keys())))
1871 assert len(symmetries) > 0
1873 assert len(symmetries[0][0]) == len(target_ligand.atoms)
1874 LogDebug(
"Found %s subgraph isomorphisms (symmetries)" % len(symmetries))
1875 elif gm.subgraph_is_isomorphic():
1876 LogDebug(
"Found subgraph isomorphisms (symmetries), but"
1877 " ignoring because substructure_match=False")
1879 str(model_ligand), str(target_ligand)))
1881 LogDebug(
"Found no isomorphic mappings (symmetries)")
1883 str(model_ligand), str(target_ligand)))
1889 """Exception raised when no symmetry can be found.
1894 class TooManySymmetriesError(ValueError):
1895 """Exception raised when too many symmetries are found.
1900 """Exception raised when the ligand graph is disconnected.
1905 __all__ = [
"LigandScorer",
"SCRMSD",
"NoSymmetryError",
1906 "TooManySymmetriesError",
"DisconnectedGraphError"]
def unassigned_model_ligand_descriptions
def _assign_ligands_rmsd_only
_unassigned_target_ligand_short
def _find_unassigned_target_ligand_reason
def unassigned_model_ligands
def _build_binding_site_entity
_unassigned_target_ligands
_unassigned_target_ligands_reason
def _find_ligand_assignment
def _find_unassigned_model_ligand_reason
def unassigned_target_ligands
_unassigned_model_ligand_short
_unassigned_model_ligands
def unassigned_target_ligand_descriptions
_unassigned_model_ligand_descriptions
_unassigned_target_ligand_descriptions
_assignment_match_coverage
def _assign_ligands_lddt_pli