3 from ost
import LogWarning, LogScript, LogInfo, LogVerbose
11 """ :class:`LigandScorer` implementing symmetry corrected RMSD (BiSyRMSD).
13 :class:`SCRMSDScorer` computes a score for a specific pair of target/model
16 The returned RMSD is based on a binding site superposition.
17 The binding site of the target structure is defined as all residues with at
18 least one atom within `bs_radius` around the target ligand.
19 It only contains protein and nucleic acid residues from chains that
20 pass the criteria for the
21 :class:`chain mapping <ost.mol.alg.chain_mapping>`. This means ignoring
22 other ligands, waters, short polymers as well as any incorrectly connected
23 chains that may be in proximity.
24 The respective model binding site for superposition is identified by
25 naively enumerating all possible mappings of model chains onto their
26 chemically equivalent target counterparts from the target binding site.
27 The `binding_sites_topn` with respect to lDDT score are evaluated and
29 You can either try to map ALL model chains onto the target binding site by
30 enabling `full_bs_search` or restrict the model chains for a specific
31 target/model ligand pair to the chains with at least one atom within
32 *model_bs_radius* around the model ligand. The latter can be significantly
33 faster in case of large complexes.
34 Symmetry correction is achieved by simply computing an RMSD value for
35 each symmetry, i.e. atom-atom assignments of the ligand as given by
36 :class:`LigandScorer`. The lowest RMSD value is returned.
38 Populates :attr:`LigandScorer.aux_data` with following :class:`dict` keys:
40 * rmsd: The BiSyRMSD score
41 * lddt_lp: lDDT of the binding pocket used for superposition (lDDT-LP)
42 * bs_ref_res: :class:`list` of binding site residues in target
43 * bs_ref_res_mapped: :class:`list` of target binding site residues that
45 * bs_mdl_res_mapped: :class:`list` of same length with respective model
47 * bb_rmsd: Backbone RMSD (CA, C3' for nucleotides; full backbone for
48 binding sites with fewer than 3 residues) for mapped binding site
49 residues after superposition
50 * target_ligand: The actual target ligand for which the score was computed
51 * model_ligand: The actual model ligand for which the score was computed
52 * chain_mapping: :class:`dict` with a chain mapping of chains involved in
53 binding site - key: trg chain name, value: mdl chain name
54 * transform: :class:`geom.Mat4` to transform model binding site onto target
56 * inconsistent_residues: :class:`list` of :class:`tuple` representing
57 residues with inconsistent residue names upon mapping (which is given by
58 bs_ref_res_mapped and bs_mdl_res_mapped). Tuples have two elements:
59 1) trg residue 2) mdl residue
61 :param model: Passed to parent constructor - see :class:`LigandScorer`.
62 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
63 :param target: Passed to parent constructor - see :class:`LigandScorer`.
64 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
65 :param model_ligands: Passed to parent constructor - see
66 :class:`LigandScorer`.
67 :type model_ligands: :class:`list`
68 :param target_ligands: Passed to parent constructor - see
69 :class:`LigandScorer`.
70 :type target_ligands: :class:`list`
71 :param resnum_alignments: Passed to parent constructor - see
72 :class:`LigandScorer`.
73 :type resnum_alignments: :class:`bool`
74 :param rename_ligand_chain: Passed to parent constructor - see
75 :class:`LigandScorer`.
76 :type rename_ligand_chain: :class:`bool`
77 :param substructure_match: Passed to parent constructor - see
78 :class:`LigandScorer`.
79 :type substructure_match: :class:`bool`
80 :param coverage_delta: Passed to parent constructor - see
81 :class:`LigandScorer`.
82 :type coverage_delta: :class:`float`
83 :param max_symmetries: Passed to parent constructor - see
84 :class:`LigandScorer`.
85 :type max_symmetries: :class:`int`
86 :param bs_radius: Inclusion radius for the binding site. Residues with
87 atoms within this distance of the ligand will be considered
88 for inclusion in the binding site.
89 :type bs_radius: :class:`float`
90 :param lddt_lp_radius: lDDT inclusion radius for lDDT-LP.
91 :type lddt_lp_radius: :class:`float`
92 :param model_bs_radius: inclusion radius for model binding sites.
93 Only used when full_bs_search=False, otherwise the
94 radius is effectively infinite. Only chains with
95 atoms within this distance of a model ligand will
96 be considered in the chain mapping.
97 :type model_bs_radius: :class:`float`
98 :param binding_sites_topn: maximum number of model binding site
99 representations to assess per target binding
101 :type binding_sites_topn: :class:`int`
102 :param full_bs_search: If True, all potential binding sites in the model
103 are searched for each target binding site. If False,
104 the search space in the model is reduced to chains
105 around (`model_bs_radius` Å) model ligands.
106 This speeds up computations, but may result in
107 ligands not being scored if the predicted ligand
108 pose is too far from the actual binding site.
109 :type full_bs_search: :class:`bool`
110 :param min_pep_length: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`.
111 :type min_pep_length: :class:`int`
112 :param min_nuc_length: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
113 :type min_nuc_length: :class:`int`
114 :param pep_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
115 :type pep_seqid_thr: :class:`float`
116 :param nuc_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
117 :type nuc_seqid_thr: :class:`float`
118 :param mdl_map_pep_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
119 :type mdl_map_pep_seqid_thr: :class:`float`
120 :param mdl_map_nuc_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
121 :type mdl_map_nuc_seqid_thr: :class:`float`
123 def __init__(self, model, target, model_ligands, target_ligands,
124 resnum_alignments=False, rename_ligand_chain=False,
125 substructure_match=False, coverage_delta=0.2,
126 max_symmetries=1e5, bs_radius=4.0, lddt_lp_radius=15.0,
127 model_bs_radius=25, binding_sites_topn=100000,
128 full_bs_search=False, min_pep_length = 6,
129 min_nuc_length = 4, pep_seqid_thr = 95.,
131 mdl_map_pep_seqid_thr = 0.,
132 mdl_map_nuc_seqid_thr = 0.):
134 super().
__init__(model, target, model_ligands, target_ligands,
135 resnum_alignments = resnum_alignments,
136 rename_ligand_chain = rename_ligand_chain,
137 substructure_match = substructure_match,
138 coverage_delta = coverage_delta,
139 max_symmetries = max_symmetries,
140 min_pep_length = min_pep_length,
141 min_nuc_length = min_nuc_length,
142 pep_seqid_thr = pep_seqid_thr,
143 nuc_seqid_thr = nuc_seqid_thr,
144 mdl_map_pep_seqid_thr = mdl_map_pep_seqid_thr,
145 mdl_map_nuc_seqid_thr = mdl_map_nuc_seqid_thr)
160 self.
_repr_repr = dict()
168 "No residues were in proximity of the "
170 self.
state_decodingstate_decoding[11] = (
"model_binding_site",
"Binding site was not"
171 " found in the model, i.e. the binding site"
172 " was not modeled or the model ligand was "
173 "positioned too far in combination with "
174 "full_bs_search=False.")
176 "Unknown error occured in SCRMSDScorer")
178 def _compute(self, symmetries, target_ligand, model_ligand):
179 """ Implements interface from parent
182 best_rmsd_result = {
"rmsd":
None,
184 "bs_ref_res": list(),
185 "bs_ref_res_mapped": list(),
186 "bs_mdl_res_mapped": list(),
188 "target_ligand": target_ligand,
189 "model_ligand": model_ligand,
190 "chain_mapping": dict(),
192 "inconsistent_residues": list()}
194 representations = self.
_get_repr_get_repr(target_ligand, model_ligand)
196 msg =
"Computing BiSyRMSD with %d chain mappings" % len(representations)
197 (LogWarning
if len(representations) > 10000
else LogInfo)(msg)
199 for r
in representations:
200 rmsd = _SCRMSD_symmetries(symmetries, model_ligand,
201 target_ligand, transformation=r.transform)
203 if best_rmsd_result[
"rmsd"]
is None or \
204 rmsd < best_rmsd_result[
"rmsd"]:
205 best_rmsd_result = {
"rmsd": rmsd,
207 "bs_ref_res": r.substructure.residues,
208 "bs_ref_res_mapped": r.ref_residues,
209 "bs_mdl_res_mapped": r.mdl_residues,
210 "bb_rmsd": r.bb_rmsd,
211 "target_ligand": target_ligand,
212 "model_ligand": model_ligand,
213 "chain_mapping": r.GetFlatChainMapping(),
214 "transform": r.transform,
215 "inconsistent_residues":
216 r.inconsistent_residues}
218 target_ligand_state = 0
219 model_ligand_state = 0
222 if best_rmsd_result[
"rmsd"]
is not None:
223 best_rmsd = best_rmsd_result[
"rmsd"]
231 target_ligand_state = 10
232 elif len(representations) == 0:
235 return (best_rmsd, pair_state, target_ligand_state, model_ligand_state,
238 def _score_dir(self):
239 """ Implements interface from parent
243 def _get_repr(self, target_ligand, model_ligand):
248 key = (target_ligand.handle.hash_code, 0)
250 key = (target_ligand.handle.hash_code,
251 model_ligand.handle.hash_code)
253 if key
not in self.
_repr_repr:
255 LogVerbose(
"%d chains are in proximity of the target ligand: %s" % (
256 ref_bs.chain_count,
", ".join([c.name
for c
in ref_bs.chains])))
265 inclusion_radius=radius,
267 chem_mapping_result=repr_in)
268 self.
_repr_repr[key] = reprs
270 return self.
_repr_repr[key]
272 def _get_target_binding_site(self, target_ligand):
274 if target_ligand.handle.hash_code
not in self.
_binding_sites_binding_sites:
277 ref_residues_hashes = set()
278 ignored_residue_hashes = {target_ligand.hash_code}
279 for ligand_at
in target_ligand.atoms:
280 close_atoms = self.
targettarget.FindWithin(ligand_at.GetPos(),
282 for close_at
in close_atoms:
284 ref_res = close_at.GetResidue()
285 h = ref_res.handle.GetHashCode()
286 if h
not in ref_residues_hashes
and \
287 h
not in ignored_residue_hashes:
288 with ligand_scoring_base._SinkVerbosityLevel(1):
289 view = self.
_chain_mapper_chain_mapper.target.ViewForHandle(ref_res)
291 h = ref_res.handle.GetHashCode()
292 ref_residues_hashes.add(h)
293 elif ref_res.is_ligand:
294 msg = f
"Ignoring ligand {ref_res.qualified_name} "
295 msg +=
"in binding site of "
296 msg += str(target_ligand.qualified_name)
298 ignored_residue_hashes.add(h)
299 elif ref_res.chem_type == mol.ChemType.WATERS:
302 msg = f
"Ignoring residue {ref_res.qualified_name} "
303 msg +=
"in binding site of "
304 msg += str(target_ligand.qualified_name)
306 ignored_residue_hashes.add(h)
308 ref_bs = self.
targettarget.CreateEmptyView()
309 if ref_residues_hashes:
313 for ch
in self.
targettarget.chains:
314 for r
in ch.residues:
315 if r.handle.GetHashCode()
in ref_residues_hashes:
316 ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
317 if len(ref_bs.residues) == 0:
318 raise RuntimeError(
"Failed to add proximity residues to "
319 "the reference binding site entity")
321 self.
_binding_sites_binding_sites[target_ligand.handle.hash_code] = ref_bs
323 return self.
_binding_sites_binding_sites[target_ligand.handle.hash_code]
325 def _get_get_repr_input(self, mdl_ligand):
326 if mdl_ligand.handle.hash_code
not in self.
_get_repr_input_get_repr_input:
333 for at
in mdl_ligand.atoms:
334 with ligand_scoring_base._SinkVerbosityLevel(1):
337 for close_at
in close_atoms:
338 chains.add(close_at.GetChain().GetName())
341 LogVerbose(
"%d chains are in proximity of the model ligand: %s" % (
342 len(chains),
", ".join(chains)))
346 query +=
','.join([mol.QueryQuoteName(x)
for x
in chains])
350 chem_mapping = list()
352 chem_mapping.append([x
for x
in m
if x
in chains])
368 def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(),
369 substructure_match=
False, max_symmetries=1e6):
370 """Calculate symmetry-corrected RMSD.
372 Binding site superposition must be computed separately and passed as
375 :param model_ligand: The model ligand
376 :type model_ligand: :class:`ost.mol.ResidueHandle` or
377 :class:`ost.mol.ResidueView`
378 :param target_ligand: The target ligand
379 :type target_ligand: :class:`ost.mol.ResidueHandle` or
380 :class:`ost.mol.ResidueView`
381 :param transformation: Optional transformation to apply on each atom
382 position of model_ligand.
383 :type transformation: :class:`ost.geom.Mat4`
384 :param substructure_match: Set this to True to allow partial target
386 :type substructure_match: :class:`bool`
387 :param max_symmetries: If more than that many isomorphisms exist, raise
388 a :class:`TooManySymmetriesError`. This can only be assessed by
389 generating at least that many isomorphisms and can take some time.
390 :type max_symmetries: :class:`int`
391 :rtype: :class:`float`
392 :raises: :class:`ost.mol.alg.ligand_scoring_base.NoSymmetryError` when no
393 symmetry can be found,
394 :class:`ost.mol.alg.ligand_scoring_base.DisconnectedGraphError`
395 when ligand graph is disconnected,
396 :class:`ost.mol.alg.ligand_scoring_base.TooManySymmetriesError`
397 when more than *max_symmetries* isomorphisms are found.
400 symmetries = ligand_scoring_base.ComputeSymmetries(model_ligand,
402 substructure_match=substructure_match,
404 max_symmetries=max_symmetries)
405 return _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
409 def _SCRMSD_symmetries(symmetries, model_ligand, target_ligand,
411 """Compute SCRMSD with pre-computed symmetries. Internal. """
414 mdl_ligand_pos = np.ones((model_ligand.GetAtomCount(), 4))
415 for a_idx, a
in enumerate(model_ligand.atoms):
417 mdl_ligand_pos[a_idx, 0] = p[0]
418 mdl_ligand_pos[a_idx, 1] = p[1]
419 mdl_ligand_pos[a_idx, 2] = p[2]
420 np_transformation = np.zeros((4,4))
423 np_transformation[i,j] = transformation[i,j]
424 mdl_ligand_pos = mdl_ligand_pos.dot(np_transformation.T)[:,:3]
427 trg_ligand_pos = np.zeros((target_ligand.GetAtomCount(), 3))
428 for a_idx, a
in enumerate(target_ligand.atoms):
430 trg_ligand_pos[a_idx, 0] = p[0]
431 trg_ligand_pos[a_idx, 1] = p[1]
432 trg_ligand_pos[a_idx, 2] = p[2]
439 rmsd_mdl_pos = np.zeros((target_ligand.GetAtomCount(), 3))
440 rmsd_trg_pos = np.zeros((target_ligand.GetAtomCount(), 3))
444 for i, (trg_sym, mdl_sym)
in enumerate(symmetries):
445 for idx, (mdl_anum, trg_anum)
in enumerate(zip(mdl_sym, trg_sym)):
446 rmsd_mdl_pos[idx,:] = mdl_ligand_pos[mdl_anum, :]
447 rmsd_trg_pos[idx,:] = trg_ligand_pos[trg_anum, :]
448 rmsd = np.sqrt(((rmsd_mdl_pos - rmsd_trg_pos)**2).sum(-1).mean())
455 __all__ = (
'SCRMSDScorer',
'SCRMSD')
def _unmapped_mdl_chains(self)
def _chem_group_alns(self)
def _chain_mapping_mdl(self)
def _get_target_binding_site(self, target_ligand)
def __init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, rename_ligand_chain=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, bs_radius=4.0, lddt_lp_radius=15.0, model_bs_radius=25, binding_sites_topn=100000, full_bs_search=False, min_pep_length=6, min_nuc_length=4, pep_seqid_thr=95., nuc_seqid_thr=95., mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0.)
def _get_get_repr_input(self, mdl_ligand)
def _get_repr(self, target_ligand, model_ligand)
def SCRMSD(model_ligand, target_ligand, transformation=geom.Mat4(), substructure_match=False, max_symmetries=1e6)