3 from ost
import LogWarning
10 """ :class:`LigandScorer` implementing symmetry corrected RMSD.
12 :class:`SCRMSDScorer` computes a score for a specific pair of target/model
15 The returned RMSD is based on a binding site superposition.
16 The binding site of the target structure is defined as all residues with at
17 least one atom within *bs_radius* around the target ligand.
18 It only contains protein and nucleic acid residues from chains that
19 pass the criteria for the
20 :class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring
21 other ligands, waters, short polymers as well as any incorrectly connected
22 chains that may be in proximity.
23 The respective model binding site for superposition is identified by
24 naively enumerating all possible mappings of model chains onto their
25 chemically equivalent target counterparts from the target binding site.
26 The *binding_sites_topn* with respect to lDDT score are evaluated and
28 You can either try to map ALL model chains onto the target binding site by
29 enabling *full_bs_search* or restrict the model chains for a specific
30 target/model ligand pair to the chains with at least one atom within
31 *model_bs_radius* around the model ligand. The latter can be significantly
32 faster in case of large complexes.
33 Symmetry correction is achieved by simply computing an RMSD value for
34 each symmetry, i.e. atom-atom assignments of the ligand as given by
35 :class:`LigandScorer`. The lowest RMSD value is returned
37 Populates :attr:`LigandScorer.aux_data` with following :class:`dict` keys:
40 * lddt_lp: lDDT of the binding site used for superposition
41 * bs_ref_res: :class:`list` of binding site residues in target
42 * bs_ref_res_mapped: :class:`list` of target binding site residues that
44 * bs_mdl_res_mapped: :class:`list` of same length with respective model
46 * bb_rmsd: Backbone RMSD (CA, C3' for nucleotides) for mapped residues
48 * target_ligand: The actual target ligand for which the score was computed
49 * model_ligand: The actual model ligand for which the score was computed
50 * chain_mapping: :class:`dict` with a chain mapping of chains involved in
51 binding site - key: trg chain name, value: mdl chain name
52 * transform: :class:`geom.Mat4` to transform model binding site onto target
54 * inconsistent_residues: :class:`list` of :class:`tuple` representing
55 residues with inconsistent residue names upon mapping (which is given by
56 bs_ref_res_mapped and bs_mdl_res_mapped). Tuples have two elements:
57 1) trg residue 2) mdl residue
59 :param model: Passed to parent constructor - see :class:`LigandScorer`.
60 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
61 :param target: Passed to parent constructor - see :class:`LigandScorer`.
62 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
63 :param model_ligands: Passed to parent constructor - see
64 :class:`LigandScorer`.
65 :type model_ligands: :class:`list`
66 :param target_ligands: Passed to parent constructor - see
67 :class:`LigandScorer`.
68 :type target_ligands: :class:`list`
69 :param resnum_alignments: Passed to parent constructor - see
70 :class:`LigandScorer`.
71 :type resnum_alignments: :class:`bool`
72 :param rename_ligand_chain: Passed to parent constructor - see
73 :class:`LigandScorer`.
74 :type rename_ligand_chain: :class:`bool`
75 :param substructure_match: Passed to parent constructor - see
76 :class:`LigandScorer`.
77 :type substructure_match: :class:`bool`
78 :param coverage_delta: Passed to parent constructor - see
79 :class:`LigandScorer`.
80 :type coverage_delta: :class:`float`
81 :param max_symmetries: Passed to parent constructor - see
82 :class:`LigandScorer`.
83 :type max_symmetries: :class:`int`
84 :param bs_radius: Inclusion radius for the binding site. Residues with
85 atoms within this distance of the ligand will be considered
86 for inclusion in the binding site.
87 :type bs_radius: :class:`float`
88 :param lddt_lp_radius: lDDT inclusion radius for lDDT-LP.
89 :type lddt_lp_radius: :class:`float`
90 :param model_bs_radius: inclusion radius for model binding sites.
91 Only used when full_bs_search=False, otherwise the
92 radius is effectively infinite. Only chains with
93 atoms within this distance of a model ligand will
94 be considered in the chain mapping.
95 :type model_bs_radius: :class:`float`
96 :param binding_sites_topn: maximum number of model binding site
97 representations to assess per target binding
99 :type binding_sites_topn: :class:`int`
100 :param full_bs_search: If True, all potential binding sites in the model
101 are searched for each target binding site. If False,
102 the search space in the model is reduced to chains
103 around (`model_bs_radius` Å) model ligands.
104 This speeds up computations, but may result in
105 ligands not being scored if the predicted ligand
106 pose is too far from the actual binding site.
107 :type full_bs_search: :class:`bool`
109 def __init__(self, model, target, model_ligands=None, target_ligands=None,
110 resnum_alignments=False, rename_ligand_chain=False,
111 substructure_match=False, coverage_delta=0.2,
112 max_symmetries=1e5, bs_radius=4.0, lddt_lp_radius=15.0,
113 model_bs_radius=25, binding_sites_topn=100000,
114 full_bs_search=False):
116 super().
__init__(model, target, model_ligands = model_ligands,
117 target_ligands = target_ligands,
118 resnum_alignments = resnum_alignments,
119 rename_ligand_chain = rename_ligand_chain,
120 substructure_match = substructure_match,
121 coverage_delta = coverage_delta,
122 max_symmetries = max_symmetries)
137 self.
_repr_repr = dict()
149 "No residues were in proximity of the "
151 self.
state_decodingstate_decoding[11] = (
"model_representation",
"No representation "
152 "of the reference binding site was found in "
153 "the model, i.e. the binding site was not "
154 "modeled or the model ligand was positioned "
155 "too far in combination with "
156 "full_bs_search=False.")
158 "Unknown error occured in SCRMSDScorer")
160 def _compute(self, symmetries, target_ligand, model_ligand):
161 """ Implements interface from parent
164 best_rmsd_result = {
"rmsd":
None,
166 "bs_ref_res": list(),
167 "bs_ref_res_mapped": list(),
168 "bs_mdl_res_mapped": list(),
170 "target_ligand": target_ligand,
171 "model_ligand": model_ligand,
172 "chain_mapping": dict(),
174 "inconsistent_residues": list()}
176 representations = self.
_get_repr_get_repr(target_ligand, model_ligand)
178 for r
in representations:
179 rmsd = _SCRMSD_symmetries(symmetries, model_ligand,
180 target_ligand, transformation=r.transform)
182 if best_rmsd_result[
"rmsd"]
is None or \
183 rmsd < best_rmsd_result[
"rmsd"]:
184 best_rmsd_result = {
"rmsd": rmsd,
186 "bs_ref_res": r.substructure.residues,
187 "bs_ref_res_mapped": r.ref_residues,
188 "bs_mdl_res_mapped": r.mdl_residues,
189 "bb_rmsd": r.bb_rmsd,
190 "target_ligand": target_ligand,
191 "model_ligand": model_ligand,
192 "chain_mapping": r.GetFlatChainMapping(),
193 "transform": r.transform,
194 "inconsistent_residues":
195 r.inconsistent_residues}
197 target_ligand_state = 0
198 model_ligand_state = 0
201 if best_rmsd_result[
"rmsd"]
is not None:
202 best_rmsd = best_rmsd_result[
"rmsd"]
210 target_ligand_state = 10
211 elif len(representations) == 0:
214 return (best_rmsd, pair_state, target_ligand_state, model_ligand_state,
217 def _score_dir(self):
218 """ Implements interface from parent
222 def _get_repr(self, target_ligand, model_ligand):
227 key = (target_ligand.handle.hash_code, 0)
229 key = (target_ligand.handle.hash_code,
230 model_ligand.handle.hash_code)
232 if key
not in self.
_repr_repr:
242 inclusion_radius=radius,
244 chem_mapping_result=repr_in)
245 self.
_repr_repr[key] = reprs
247 return self.
_repr_repr[key]
249 def _get_target_binding_site(self, target_ligand):
251 if target_ligand.handle.hash_code
not in self.
_binding_sites_binding_sites:
254 ref_residues_hashes = set()
255 ignored_residue_hashes = {target_ligand.hash_code}
256 for ligand_at
in target_ligand.atoms:
257 close_atoms = self.
targettarget.FindWithin(ligand_at.GetPos(),
259 for close_at
in close_atoms:
261 ref_res = close_at.GetResidue()
262 h = ref_res.handle.GetHashCode()
263 if h
not in ref_residues_hashes
and \
264 h
not in ignored_residue_hashes:
265 view = self.
_chain_mapper_chain_mapper.target.ViewForHandle(ref_res)
267 h = ref_res.handle.GetHashCode()
268 ref_residues_hashes.add(h)
269 elif ref_res.is_ligand:
270 msg = f
"Ignoring ligand {ref_res.qualified_name} "
271 msg +=
"in binding site of "
272 msg += str(target_ligand.qualified_name)
274 ignored_residue_hashes.add(h)
275 elif ref_res.chem_type == mol.ChemType.WATERS:
278 msg = f
"Ignoring residue {ref_res.qualified_name} "
279 msg +=
"in binding site of "
280 msg += str(target_ligand.qualified_name)
282 ignored_residue_hashes.add(h)
284 ref_bs = self.
targettarget.CreateEmptyView()
285 if ref_residues_hashes:
289 for ch
in self.
targettarget.chains:
290 for r
in ch.residues:
291 if r.handle.GetHashCode()
in ref_residues_hashes:
292 ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
293 if len(ref_bs.residues) == 0:
294 raise RuntimeError(
"Failed to add proximity residues to "
295 "the reference binding site entity")
297 self.
_binding_sites_binding_sites[target_ligand.handle.hash_code] = ref_bs
299 return self.
_binding_sites_binding_sites[target_ligand.handle.hash_code]
302 def _chem_mapping(self):
310 def _chem_group_alns(self):
318 def _ref_mdl_alns(self):
321 chain_mapping._GetRefMdlAlns(self.
_chain_mapper_chain_mapper.chem_groups,
328 def _chain_mapping_mdl(self):
335 def _get_get_repr_input(self, mdl_ligand):
336 if mdl_ligand.handle.hash_code
not in self.
_get_repr_input_get_repr_input:
343 for at
in mdl_ligand.atoms:
346 for close_at
in close_atoms:
347 chains.add(close_at.GetChain().GetName())
353 query +=
','.join([mol.QueryQuoteName(x)
for x
in chains])
357 chem_mapping = list()
359 chem_mapping.append([x
for x
in m
if x
in chains])
374 def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(),
375 substructure_match=
False, max_symmetries=1e6):
376 """Calculate symmetry-corrected RMSD.
378 Binding site superposition must be computed separately and passed as
381 :param model_ligand: The model ligand
382 :type model_ligand: :class:`ost.mol.ResidueHandle` or
383 :class:`ost.mol.ResidueView`
384 :param target_ligand: The target ligand
385 :type target_ligand: :class:`ost.mol.ResidueHandle` or
386 :class:`ost.mol.ResidueView`
387 :param transformation: Optional transformation to apply on each atom
388 position of model_ligand.
389 :type transformation: :class:`ost.geom.Mat4`
390 :param substructure_match: Set this to True to allow partial target
392 :type substructure_match: :class:`bool`
393 :param max_symmetries: If more than that many isomorphisms exist, raise
394 a :class:`TooManySymmetriesError`. This can only be assessed by
395 generating at least that many isomorphisms and can take some time.
396 :type max_symmetries: :class:`int`
397 :rtype: :class:`float`
398 :raises: :class:`ost.mol.alg.ligand_scoring_base.NoSymmetryError` when no
399 symmetry can be found,
400 :class:`ost.mol.alg.ligand_scoring_base.DisconnectedGraphError`
401 when ligand graph is disconnected,
402 :class:`ost.mol.alg.ligand_scoring_base.TooManySymmetriesError`
403 when more than *max_symmetries* isomorphisms are found.
406 symmetries = ligand_scoring_base.ComputeSymmetries(model_ligand,
408 substructure_match=substructure_match,
410 max_symmetries=max_symmetries)
411 return _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
415 def _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
417 """Compute SCRMSD with pre-computed symmetries. Internal. """
420 mdl_ligand_pos = np.ones((model_ligand.GetAtomCount(), 4))
421 for a_idx, a
in enumerate(model_ligand.atoms):
423 mdl_ligand_pos[a_idx, 0] = p[0]
424 mdl_ligand_pos[a_idx, 1] = p[1]
425 mdl_ligand_pos[a_idx, 2] = p[2]
426 np_transformation = np.zeros((4,4))
429 np_transformation[i,j] = transformation[i,j]
430 mdl_ligand_pos = mdl_ligand_pos.dot(np_transformation.T)[:,:3]
433 trg_ligand_pos = np.zeros((target_ligand.GetAtomCount(), 3))
434 for a_idx, a
in enumerate(target_ligand.atoms):
436 trg_ligand_pos[a_idx, 0] = p[0]
437 trg_ligand_pos[a_idx, 1] = p[1]
438 trg_ligand_pos[a_idx, 2] = p[2]
445 rmsd_mdl_pos = np.zeros((target_ligand.GetAtomCount(), 3))
446 rmsd_trg_pos = np.zeros((target_ligand.GetAtomCount(), 3))
450 for i, (trg_sym, mdl_sym)
in enumerate(symmetries):
451 for idx, (mdl_anum, trg_anum)
in enumerate(zip(mdl_sym, trg_sym)):
452 rmsd_mdl_pos[idx,:] = mdl_ligand_pos[mdl_anum, :]
453 rmsd_trg_pos[idx,:] = trg_ligand_pos[trg_anum, :]
454 rmsd = np.sqrt(((rmsd_mdl_pos - rmsd_trg_pos)**2).sum(-1).mean())
461 __all__ = (
'SCRMSDScorer',
'SCRMSD')
def _get_target_binding_site(self, target_ligand)
def _get_get_repr_input(self, mdl_ligand)
def _chem_group_alns(self)
def _chain_mapping_mdl(self)
def _get_repr(self, target_ligand, model_ligand)
def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, rename_ligand_chain=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, bs_radius=4.0, lddt_lp_radius=15.0, model_bs_radius=25, binding_sites_topn=100000, full_bs_search=False)
def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(), substructure_match=False, max_symmetries=1e6)