OpenStructure
Loading...
Searching...
No Matches
ligand_scoring_lddtpli.py
Go to the documentation of this file.
1import numpy as np
2
3from ost import LogWarning, LogInfo
4from ost import geom
5from ost import mol
6from ost import seq
7
8from ost.mol.alg import lddt
9from ost.mol.alg import chain_mapping
10from ost.mol.alg import ligand_scoring_base
11
13 """ :class:`LigandScorer` implementing LDDT-PLI.
14
15 LDDT-PLI is an LDDT score considering contacts between ligand and
16 receptor. Where receptor consists of protein and nucleic acid chains that
17 pass the criteria for :class:`chain mapping <ost.mol.alg.chain_mapping>`.
18 This means ignoring other ligands, waters, short polymers as well as any
19 incorrectly connected chains that may be in proximity.
20
21 :class:`LDDTPLIScorer` computes a score for a specific pair of target/model
22 ligands. Given a target/model ligand pair, all possible mappings of
23 model chains onto their chemically equivalent target chains are enumerated.
24 For each of these enumerations, all possible symmetries, i.e. atom-atom
25 assignments of the ligand as given by :class:`LigandScorer`, are evaluated
26 and an LDDT-PLI score is computed. The best possible LDDT-PLI score is
27 returned.
28
29 The LDDT-PLI score is a variant of LDDT with a custom inclusion radius
30 (`lddt_pli_radius`), no stereochemistry checks, and which penalizes
31 contacts added in the model within `lddt_pli_radius` by default
32 (can be changed with the `add_mdl_contacts` flag) but only if the involved
33 atoms can be mapped to the target. This is a requirement to
34 1) extract the respective reference distance from the target
35 2) avoid usage of contacts for which we have no experimental evidence.
36 One special case are contacts from chains that are not mapped to the target
37 binding site. It is very well possible that we have experimental evidence
38 for this chain though its just too far away from the target binding site.
39 We therefore try to map these contacts to the chain in the target with
40 equivalent sequence that is closest to the target binding site. If the
41 respective atoms can be mapped there, the contact is considered not
42 fulfilled and added as penalty.
43
44 Populates :attr:`LigandScorer.aux_data` with following :class:`dict` keys:
45
46 * lddt_pli: The LDDT-PLI score
47 * lddt_pli_n_contacts: Number of contacts considered in LDDT computation
48 * target_ligand: The actual target ligand for which the score was computed
49 * model_ligand: The actual model ligand for which the score was computed
50 * chain_mapping: :class:`dict` with a chain mapping of chains involved in
51 binding site - key: trg chain name, value: mdl chain name
52
53 :param model: Passed to parent constructor - see :class:`LigandScorer`.
54 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
55 :param target: Passed to parent constructor - see :class:`LigandScorer`.
56 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
57 :param model_ligands: Passed to parent constructor - see
58 :class:`LigandScorer`.
59 :type model_ligands: :class:`list`
60 :param target_ligands: Passed to parent constructor - see
61 :class:`LigandScorer`.
62 :type target_ligands: :class:`list`
63 :param resnum_alignments: Passed to parent constructor - see
64 :class:`LigandScorer`.
65 :type resnum_alignments: :class:`bool`
66 :param rename_ligand_chain: Passed to parent constructor - see
67 :class:`LigandScorer`.
68 :type rename_ligand_chain: :class:`bool`
69 :param substructure_match: Passed to parent constructor - see
70 :class:`LigandScorer`.
71 :type substructure_match: :class:`bool`
72 :param coverage_delta: Passed to parent constructor - see
73 :class:`LigandScorer`.
74 :type coverage_delta: :class:`float`
75 :param max_symmetries: Passed to parent constructor - see
76 :class:`LigandScorer`.
77 :type max_symmetries: :class:`int`
78 :param lddt_pli_radius: LDDT inclusion radius for LDDT-PLI.
79 :type lddt_pli_radius: :class:`float`
80 :param add_mdl_contacts: Whether to penalize added model contacts.
81 :type add_mdl_contacts: :class:`bool`
82 :param lddt_pli_thresholds: Distance difference thresholds for LDDT.
83 :type lddt_pli_thresholds: :class:`list` of :class:`float`
84 :param lddt_pli_binding_site_radius: Pro param - dont use. Providing a value
85 Restores behaviour from previous
86 implementation that first extracted a
87 binding site with strict distance
88 threshold and computed LDDT-PLI only on
89 those target residues whereas the
90 current implementation includes every
91 atom within *lddt_pli_radius*.
92 :type lddt_pli_binding_site_radius: :class:`float`
93 :param min_pep_length: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`.
94 :type min_pep_length: :class:`int`
95 :param min_nuc_length: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
96 :type min_nuc_length: :class:`int`
97 :param pep_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
98 :type pep_seqid_thr: :class:`float`
99 :param nuc_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
100 :type nuc_seqid_thr: :class:`float`
101 :param mdl_map_pep_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
102 :type mdl_map_pep_seqid_thr: :class:`float`
103 :param mdl_map_nuc_seqid_thr: See :class:`ost.mol.alg.ligand_scoring_base.LigandScorer`
104 :type mdl_map_nuc_seqid_thr: :class:`float`
105 """
106
107 def __init__(self, model, target, model_ligands, target_ligands,
108 resnum_alignments=False, rename_ligand_chain=False,
109 substructure_match=False, coverage_delta=0.2,
110 max_symmetries=1e4, lddt_pli_radius=6.0,
111 add_mdl_contacts=True,
112 lddt_pli_thresholds = [0.5, 1.0, 2.0, 4.0],
113 lddt_pli_binding_site_radius=None,
114 min_pep_length = 6,
115 min_nuc_length = 4, pep_seqid_thr = 95.,
116 nuc_seqid_thr = 95.,
117 mdl_map_pep_seqid_thr = 0.,
118 mdl_map_nuc_seqid_thr = 0.,
119 seqres=None,
120 trg_seqres_mapping=None):
121
122 super().__init__(model, target, model_ligands, target_ligands,
123 resnum_alignments = resnum_alignments,
124 rename_ligand_chain = rename_ligand_chain,
125 substructure_match = substructure_match,
126 coverage_delta = coverage_delta,
127 max_symmetries = max_symmetries,
128 min_pep_length = min_pep_length,
129 min_nuc_length = min_nuc_length,
130 pep_seqid_thr = pep_seqid_thr,
131 nuc_seqid_thr = nuc_seqid_thr,
132 mdl_map_pep_seqid_thr = mdl_map_pep_seqid_thr,
133 mdl_map_nuc_seqid_thr = mdl_map_nuc_seqid_thr,
134 seqres = seqres,
135 trg_seqres_mapping = trg_seqres_mapping)
136
137 self.lddt_pli_radius = lddt_pli_radius
138 self.add_mdl_contacts = add_mdl_contacts
139 self.lddt_pli_thresholds = lddt_pli_thresholds
140 self.lddt_pli_binding_site_radius = lddt_pli_binding_site_radius
141
142 # lazily precomputed variables to speedup lddt-pli computation
145 self.__mappable_atoms = None
146
147 # update state decoding from parent with subclass specific stuff
148 self.state_decoding[10] = ("no_contact",
149 "There were no LDDT contacts between the "
150 "binding site and the ligand, and LDDT-PLI "
151 "is undefined.")
152 self.state_decoding[20] = ("unknown",
153 "Unknown error occured in LDDTPLIScorer")
154
155 def _compute(self, symmetries, target_ligand, model_ligand):
156 """ Implements interface from parent
157 """
158 if self.add_mdl_contacts:
159 LogInfo("Computing LDDT-PLI with added model contacts")
160 result = self._compute_lddt_pli_add_mdl_contacts(symmetries,
161 target_ligand,
162 model_ligand)
163 else:
164 LogInfo("Computing LDDT-PLI without added model contacts")
165 result = self._compute_lddt_pli_classic(symmetries,
166 target_ligand,
167 model_ligand)
168
169 pair_state = 0
170 score = result["lddt_pli"]
171
172 if score is None or np.isnan(score):
173 if result["lddt_pli_n_contacts"] == 0:
174 # it's a space ship!
175 pair_state = 10
176 else:
177 # unknwon error state
178 pair_state = 20
179
180 # the ligands get a zero-state...
181 target_ligand_state = 0
182 model_ligand_state = 0
183
184 return (score, pair_state, target_ligand_state, model_ligand_state,
185 result)
186
187 def _score_dir(self):
188 """ Implements interface from parent
189 """
190 return '+'
191
192 def _compute_lddt_pli_add_mdl_contacts(self, symmetries, target_ligand,
193 model_ligand):
194
195
198
199 trg_residues, trg_bs, trg_chains, trg_ligand_chain, \
200 trg_ligand_res, scorer, chem_groups = \
201 self._lddt_pli_get_trg_data(target_ligand)
202
203 trg_bs_center = trg_bs.geometric_center
204
205 # Copy to make sure that we don't change anything on underlying
206 # references
207 # This is not strictly necessary in the current implementation but
208 # hey, maybe it avoids hard to debug errors when someone changes things
209 ref_indices = [a.copy() for a in scorer.ref_indices_ic]
210 ref_distances = [a.copy() for a in scorer.ref_distances_ic]
211
212 # distance hacking... remove any interchain distance except the ones
213 # with the ligand
214 ligand_start_idx = scorer.chain_start_indices[-1]
215 for at_idx in range(ligand_start_idx):
216 mask = ref_indices[at_idx] >= ligand_start_idx
217 ref_indices[at_idx] = ref_indices[at_idx][mask]
218 ref_distances[at_idx] = ref_distances[at_idx][mask]
219
220 mdl_residues, mdl_bs, mdl_chains, mdl_ligand_chain, mdl_ligand_res, \
221 chem_mapping = self._lddt_pli_get_mdl_data(model_ligand)
222
223
226
227 # ref_mdl_alns refers to full chain mapper trg and mdl structures
228 # => need to adapt mdl sequence that only contain residues in contact
229 # with ligand
230 cut_ref_mdl_alns = self._lddt_pli_cut_ref_mdl_alns(chem_groups,
231 chem_mapping,
232 mdl_bs, trg_bs)
233
234
237
238 # get each chain mapping that we ever observe in scoring
239 chain_mappings = list(chain_mapping._ChainMappings(chem_groups,
240 chem_mapping))
241
242 # for each mdl ligand atom, we collect all trg ligand atoms that are
243 # ever mapped onto it given *symmetries*
244 ligand_atom_mappings = [set() for a in mdl_ligand_res.atoms]
245 for (trg_sym, mdl_sym) in symmetries:
246 for trg_i, mdl_i in zip(trg_sym, mdl_sym):
247 ligand_atom_mappings[mdl_i].add(trg_i)
248
249 mdl_ligand_pos = np.zeros((mdl_ligand_res.GetAtomCount(), 3))
250 for a_idx, a in enumerate(mdl_ligand_res.atoms):
251 p = a.GetPos()
252 mdl_ligand_pos[a_idx, 0] = p[0]
253 mdl_ligand_pos[a_idx, 1] = p[1]
254 mdl_ligand_pos[a_idx, 2] = p[2]
255
256 trg_ligand_pos = np.zeros((trg_ligand_res.GetAtomCount(), 3))
257 for a_idx, a in enumerate(trg_ligand_res.atoms):
258 p = a.GetPos()
259 trg_ligand_pos[a_idx, 0] = p[0]
260 trg_ligand_pos[a_idx, 1] = p[1]
261 trg_ligand_pos[a_idx, 2] = p[2]
262
263 mdl_lig_hashes = [a.hash_code for a in mdl_ligand_res.atoms]
264
265 symmetric_atoms = np.asarray(sorted(list(scorer.symmetric_atoms)),
266 dtype=np.int64)
267
268 # two caches to cache things for each chain mapping => lists
269 # of len(chain_mappings)
270 #
271 # In principle we're caching for each trg/mdl ligand atom pair all
272 # information to update ref_indices/ref_distances and resolving the
273 # symmetries of the binding site.
274 # in detail: each list entry in *scoring_cache* is a dict with
275 # key: (mdl_lig_at_idx, trg_lig_at_idx)
276 # value: tuple with 4 elements - 1: indices of atoms representing added
277 # contacts relative to overall inexing scheme in scorer 2: the
278 # respective distances 3: the same but only containing indices towards
279 # atoms of the binding site that are considered symmetric 4: the
280 # respective indices.
281 # each list entry in *penalty_cache* is a list of len N mdl lig atoms.
282 # For each mdl lig at it contains a penalty for this mdl lig at. That
283 # means the number of contacts in the mdl binding site that can
284 # directly be mapped to the target given the local chain mapping but
285 # are not present in the target binding site, i.e. interacting atoms are
286 # too far away.
287 scoring_cache = list()
288 penalty_cache = list()
289
290 for mapping in chain_mappings:
291
292 # flat mapping with mdl chain names as key
293 flat_mapping = dict()
294 for trg_chem_group, mdl_chem_group in zip(chem_groups, mapping):
295 for a,b in zip(trg_chem_group, mdl_chem_group):
296 if a is not None and b is not None:
297 flat_mapping[b] = a
298
299 # for each mdl bs atom (as atom hash), the trg bs atoms (as index in
300 # scorer)
301 bs_atom_mapping = dict()
302 for mdl_cname, ref_cname in flat_mapping.items():
303 aln = cut_ref_mdl_alns[(ref_cname, mdl_cname)]
304 ref_ch = trg_bs.Select(f"cname={mol.QueryQuoteName(ref_cname)}")
305 mdl_ch = mdl_bs.Select(f"cname={mol.QueryQuoteName(mdl_cname)}")
306 aln.AttachView(0, ref_ch)
307 aln.AttachView(1, mdl_ch)
308 for col in aln:
309 ref_r = col.GetResidue(0)
310 mdl_r = col.GetResidue(1)
311 if ref_r.IsValid() and mdl_r.IsValid():
312 for mdl_a in mdl_r.atoms:
313 ref_a = ref_r.FindAtom(mdl_a.GetName())
314 if ref_a.IsValid():
315 ref_h = ref_a.handle.hash_code
316 if ref_h in scorer.atom_indices:
317 mdl_h = mdl_a.handle.hash_code
318 bs_atom_mapping[mdl_h] = \
319 scorer.atom_indices[ref_h]
320
321 cache = dict()
322 n_penalties = list()
323
324 for mdl_a_idx, mdl_a in enumerate(mdl_ligand_res.atoms):
325 n_penalty = 0
326 trg_bs_indices = list()
327 close_a = mdl_bs.FindWithin(mdl_a.GetPos(),
328 self.lddt_pli_radius)
329 for a in close_a:
330 mdl_a_hash_code = a.hash_code
331 if mdl_a_hash_code in bs_atom_mapping:
332 trg_bs_indices.append(bs_atom_mapping[mdl_a_hash_code])
333 elif mdl_a_hash_code not in mdl_lig_hashes:
334 if a.GetChain().GetName() in flat_mapping:
335 # Its in a mapped chain
336 at_key = (a.GetResidue().GetNumber(), a.name)
337 cname = a.GetChain().name
338 cname_key = (flat_mapping[cname], cname)
339 if at_key in self._mappable_atoms[cname_key]:
340 # Its a contact in the model but not part of
341 # trg_bs. It can still be mapped using the
342 # global mdl_ch/ref_ch alignment
343 # d in ref > self.lddt_pli_radius + max_thresh
344 # => guaranteed to be non-fulfilled contact
345 n_penalty += 1
346
347 n_penalties.append(n_penalty)
348
349 trg_bs_indices = np.asarray(sorted(trg_bs_indices))
350
351 for trg_a_idx in ligand_atom_mappings[mdl_a_idx]:
352 # mask selects entries in trg_bs_indices that are not yet
353 # part of classic LDDT ref_indices for atom at trg_a_idx
354 # => added mdl contacts
355 mask = np.isin(trg_bs_indices,
356 ref_indices[ligand_start_idx + trg_a_idx],
357 assume_unique=True, invert=True)
358 added_indices = np.asarray([], dtype=np.int64)
359 added_distances = np.asarray([], dtype=np.float64)
360 if np.sum(mask) > 0:
361 # compute ref distances on reference positions
362 added_indices = trg_bs_indices[mask]
363 tmp = scorer.positions.take(added_indices, axis=0)
364 np.subtract(tmp, trg_ligand_pos[trg_a_idx][None, :],
365 out=tmp)
366 np.square(tmp, out=tmp)
367 tmp = tmp.sum(axis=1)
368 np.sqrt(tmp, out=tmp)
369 added_distances = tmp
370
371 # extract the distances towards bs atoms that are symmetric
372 sym_mask = np.isin(added_indices, symmetric_atoms,
373 assume_unique=True)
374
375 cache[(mdl_a_idx, trg_a_idx)] = (added_indices,
376 added_distances,
377 added_indices[sym_mask],
378 added_distances[sym_mask])
379
380 scoring_cache.append(cache)
381 penalty_cache.append(n_penalties)
382
383 # cache for model contacts towards non mapped trg chains - this is
384 # relevant for self._lddt_pli_unmapped_chain_penalty
385 # key: tuple in form (trg_ch, mdl_ch)
386 # value: yet another dict with
387 # key: ligand_atom_hash
388 # value: n contacts towards respective trg chain that can be mapped
389 non_mapped_cache = dict()
390
391
394
395 best_score = -1.0
396 best_result = {"lddt_pli": None,
397 "lddt_pli_n_contacts": 0,
398 "chain_mapping": None}
399
400 # dummy alignment for ligand chains which is needed as input later on
401 ligand_aln = seq.CreateAlignment()
402 trg_s = seq.CreateSequence(trg_ligand_chain.name,
403 trg_ligand_res.GetOneLetterCode())
404 mdl_s = seq.CreateSequence(mdl_ligand_chain.name,
405 mdl_ligand_res.GetOneLetterCode())
406 ligand_aln.AddSequence(trg_s)
407 ligand_aln.AddSequence(mdl_s)
408 ligand_at_indices = list(range(ligand_start_idx, scorer.n_atoms))
409
410 sym_idx_collector = [None] * scorer.n_atoms
411 sym_dist_collector = [None] * scorer.n_atoms
412
413 for mapping, s_cache, p_cache in zip(chain_mappings, scoring_cache,
414 penalty_cache):
415
416 lddt_chain_mapping = dict()
417 lddt_alns = dict()
418 for ref_chem_group, mdl_chem_group in zip(chem_groups, mapping):
419 for ref_ch, mdl_ch in zip(ref_chem_group, mdl_chem_group):
420 # some mdl chains can be None
421 if mdl_ch is not None:
422 lddt_chain_mapping[mdl_ch] = ref_ch
423 lddt_alns[mdl_ch] = cut_ref_mdl_alns[(ref_ch, mdl_ch)]
424
425 # add ligand to lddt_chain_mapping/lddt_alns
426 lddt_chain_mapping[mdl_ligand_chain.name] = trg_ligand_chain.name
427 lddt_alns[mdl_ligand_chain.name] = ligand_aln
428
429 # already process model, positions will be manually hacked for each
430 # symmetry - small overhead for variables that are thrown away here
431 pos, _, _, _, _, _, lddt_symmetries = \
432 scorer._ProcessModel(mdl_bs, lddt_chain_mapping,
433 residue_mapping = lddt_alns,
434 nirvana_dist = self.lddt_pli_radius + max(self.lddt_pli_thresholds),
435 check_resnames = False)
436
437 # estimate a penalty for unsatisfied model contacts from chains
438 # that are not in the local trg binding site, but can be mapped in
439 # the target.
440 # We're using the trg chain with the closest geometric center to
441 # the trg binding site that can be mapped to the mdl chain
442 # according the chem mapping. An alternative would be to search for
443 # the target chain with the minimal number of additional contacts.
444 # There is not good solution for this problem...
445 unmapped_chains = list()
446 already_mapped = set()
447 for mdl_ch in mdl_chains:
448 if mdl_ch not in lddt_chain_mapping:
449
450 if mdl_ch in self._mdl_chains_without_chem_mapping:
451 # this mdl chain does not map to any trg chain
452 continue
453
454 # check which chain in trg is closest
455 chem_grp_idx = None
456 for i, m in enumerate(self._chem_mapping_chem_mapping_chem_mapping):
457 if mdl_ch in m:
458 chem_grp_idx = i
459 break
460 if chem_grp_idx is None:
461 raise RuntimeError("This should never happen... "
462 "ask Gabriel...")
463 closest_ch = None
464 closest_dist = None
465 for trg_ch in self._chain_mapper.chem_groups[chem_grp_idx]:
466 if trg_ch not in lddt_chain_mapping.values():
467 if trg_ch not in already_mapped:
468 ch = self._chain_mapper.target.FindChain(trg_ch)
469 c = ch.geometric_center
470 d = geom.Distance(trg_bs_center, c)
471 if closest_dist is None or d < closest_dist:
472 closest_dist = d
473 closest_ch = trg_ch
474 if closest_ch is not None:
475 unmapped_chains.append((closest_ch, mdl_ch))
476 already_mapped.add(closest_ch)
477
478 for (trg_sym, mdl_sym) in symmetries:
479
480 # update positions
481 for mdl_i, trg_i in zip(mdl_sym, trg_sym):
482 pos[ligand_start_idx + trg_i, :] = mdl_ligand_pos[mdl_i, :]
483
484 # start new ref_indices/ref_distances from original values
485 funky_ref_indices = [np.copy(a) for a in ref_indices]
486 funky_ref_distances = [np.copy(a) for a in ref_distances]
487
488 # The only distances from the binding site towards the ligand
489 # we care about are the ones from the symmetric atoms to
490 # correctly compute scorer._ResolveSymmetries.
491 # We collect them while updating distances from added mdl
492 # contacts
493 for idx in symmetric_atoms:
494 sym_idx_collector[idx] = list()
495 sym_dist_collector[idx] = list()
496
497 # add data from added mdl contacts cache
498 added_penalty = 0
499 for mdl_i, trg_i in zip(mdl_sym, trg_sym):
500 added_penalty += p_cache[mdl_i]
501 cache = s_cache[mdl_i, trg_i]
502 full_trg_i = ligand_start_idx + trg_i
503 funky_ref_indices[full_trg_i] = \
504 np.append(funky_ref_indices[full_trg_i], cache[0])
505 funky_ref_distances[full_trg_i] = \
506 np.append(funky_ref_distances[full_trg_i], cache[1])
507 for idx, d in zip(cache[2], cache[3]):
508 sym_idx_collector[idx].append(full_trg_i)
509 sym_dist_collector[idx].append(d)
510
511 for idx in symmetric_atoms:
512 funky_ref_indices[idx] = \
513 np.append(funky_ref_indices[idx],
514 np.asarray(sym_idx_collector[idx],
515 dtype=np.int64))
516 funky_ref_distances[idx] = \
517 np.append(funky_ref_distances[idx],
518 np.asarray(sym_dist_collector[idx],
519 dtype=np.float64))
520
521 # we can pass funky_ref_indices/funky_ref_distances as
522 # sym_ref_indices/sym_ref_distances in
523 # scorer._ResolveSymmetries as we only have distances of the bs
524 # to the ligand and ligand atoms are "non-symmetric"
525 scorer._ResolveSymmetries(pos, self.lddt_pli_thresholds,
526 lddt_symmetries,
527 funky_ref_indices,
528 funky_ref_distances)
529
530 N = sum([len(funky_ref_indices[i]) for i in ligand_at_indices])
531 N += added_penalty
532
533 # collect number of expected contacts which can be mapped
534 if len(unmapped_chains) > 0:
535 N += self._lddt_pli_unmapped_chain_penalty(unmapped_chains,
536 non_mapped_cache,
537 mdl_bs,
538 mdl_ligand_res,
539 mdl_sym)
540
541 conserved = np.sum(scorer._EvalAtoms(pos, ligand_at_indices,
543 funky_ref_indices,
544 funky_ref_distances),
545 axis=0)
546 score = None
547 if N > 0:
548 score = np.mean(conserved/N)
549
550 if score is not None and score > best_score:
551 best_score = score
552 save_chain_mapping = dict(lddt_chain_mapping)
553 del save_chain_mapping[mdl_ligand_chain.name]
554 best_result = {"lddt_pli": score,
555 "lddt_pli_n_contacts": N,
556 "chain_mapping": save_chain_mapping}
557
558 # fill misc info to result object
559 best_result["target_ligand"] = target_ligand
560 best_result["model_ligand"] = model_ligand
561
562 return best_result
563
564
565 def _compute_lddt_pli_classic(self, symmetries, target_ligand,
566 model_ligand):
567
568
571
572 max_r = None
575
576 trg_residues, trg_bs, trg_chains, trg_ligand_chain, \
577 trg_ligand_res, scorer, chem_groups = \
578 self._lddt_pli_get_trg_data(target_ligand, max_r = max_r)
579
580 # Copy to make sure that we don't change anything on underlying
581 # references
582 # This is not strictly necessary in the current implementation but
583 # hey, maybe it avoids hard to debug errors when someone changes things
584 ref_indices = [a.copy() for a in scorer.ref_indices_ic]
585 ref_distances = [a.copy() for a in scorer.ref_distances_ic]
586
587 # no matter what mapping/symmetries, the number of expected
588 # contacts stays the same
589 ligand_start_idx = scorer.chain_start_indices[-1]
590 ligand_at_indices = list(range(ligand_start_idx, scorer.n_atoms))
591 n_exp = sum([len(ref_indices[i]) for i in ligand_at_indices])
592
593 mdl_residues, mdl_bs, mdl_chains, mdl_ligand_chain, mdl_ligand_res, \
594 chem_mapping = self._lddt_pli_get_mdl_data(model_ligand)
595
596 if n_exp == 0:
597 # no contacts... nothing to compute...
598 return {"lddt_pli": None,
599 "lddt_pli_n_contacts": 0,
600 "chain_mapping": None,
601 "target_ligand": target_ligand,
602 "model_ligand": model_ligand}
603
604 # Distance hacking... remove any interchain distance except the ones
605 # with the ligand
606 for at_idx in range(ligand_start_idx):
607 mask = ref_indices[at_idx] >= ligand_start_idx
608 ref_indices[at_idx] = ref_indices[at_idx][mask]
609 ref_distances[at_idx] = ref_distances[at_idx][mask]
610
611
614
615 # ref_mdl_alns refers to full chain mapper trg and mdl structures
616 # => need to adapt mdl sequence that only contain residues in contact
617 # with ligand
618 cut_ref_mdl_alns = self._lddt_pli_cut_ref_mdl_alns(chem_groups,
619 chem_mapping,
620 mdl_bs, trg_bs)
621
622
625
626 best_score = -1.0
627
628 # dummy alignment for ligand chains which is needed as input later on
629 l_aln = seq.CreateAlignment()
630 l_aln.AddSequence(seq.CreateSequence(trg_ligand_chain.name,
631 trg_ligand_res.GetOneLetterCode()))
632 l_aln.AddSequence(seq.CreateSequence(mdl_ligand_chain.name,
633 mdl_ligand_res.GetOneLetterCode()))
634
635 mdl_ligand_pos = np.zeros((model_ligand.GetAtomCount(), 3))
636 for a_idx, a in enumerate(model_ligand.atoms):
637 p = a.GetPos()
638 mdl_ligand_pos[a_idx, 0] = p[0]
639 mdl_ligand_pos[a_idx, 1] = p[1]
640 mdl_ligand_pos[a_idx, 2] = p[2]
641
642 for mapping in chain_mapping._ChainMappings(chem_groups, chem_mapping):
643
644 lddt_chain_mapping = dict()
645 lddt_alns = dict()
646 for ref_chem_group, mdl_chem_group in zip(chem_groups, mapping):
647 for ref_ch, mdl_ch in zip(ref_chem_group, mdl_chem_group):
648 # some mdl chains can be None
649 if mdl_ch is not None:
650 lddt_chain_mapping[mdl_ch] = ref_ch
651 lddt_alns[mdl_ch] = cut_ref_mdl_alns[(ref_ch, mdl_ch)]
652
653 # add ligand to lddt_chain_mapping/lddt_alns
654 lddt_chain_mapping[mdl_ligand_chain.name] = trg_ligand_chain.name
655 lddt_alns[mdl_ligand_chain.name] = l_aln
656
657 # already process model, positions will be manually hacked for each
658 # symmetry - small overhead for variables that are thrown away here
659 pos, _, _, _, _, _, lddt_symmetries = \
660 scorer._ProcessModel(mdl_bs, lddt_chain_mapping,
661 residue_mapping = lddt_alns,
662 nirvana_dist = self.lddt_pli_radius + max(self.lddt_pli_thresholds),
663 check_resnames = False)
664
665 for (trg_sym, mdl_sym) in symmetries:
666 for mdl_i, trg_i in zip(mdl_sym, trg_sym):
667 pos[ligand_start_idx + trg_i, :] = mdl_ligand_pos[mdl_i, :]
668 # we can pass ref_indices/ref_distances as
669 # sym_ref_indices/sym_ref_distances in
670 # scorer._ResolveSymmetries as we only have distances of the bs
671 # to the ligand and ligand atoms are "non-symmetric"
672 scorer._ResolveSymmetries(pos, self.lddt_pli_thresholds,
673 lddt_symmetries,
674 ref_indices,
675 ref_distances)
676 # compute number of conserved distances for ligand atoms
677 conserved = np.sum(scorer._EvalAtoms(pos, ligand_at_indices,
679 ref_indices,
680 ref_distances), axis=0)
681 score = np.mean(conserved/n_exp)
682
683 if score > best_score:
684 best_score = score
685 save_chain_mapping = dict(lddt_chain_mapping)
686 del save_chain_mapping[mdl_ligand_chain.name]
687 best_result = {"lddt_pli": score,
688 "chain_mapping": save_chain_mapping}
689
690 # fill misc info to result object
691 best_result["lddt_pli_n_contacts"] = n_exp
692 best_result["target_ligand"] = target_ligand
693 best_result["model_ligand"] = model_ligand
694
695 return best_result
696
697 def _lddt_pli_unmapped_chain_penalty(self, unmapped_chains,
698 non_mapped_cache,
699 mdl_bs,
700 mdl_ligand_res,
701 mdl_sym):
702
703 n_exp = 0
704 for ch_tuple in unmapped_chains:
705 if ch_tuple not in non_mapped_cache:
706 # for each ligand atom, we count the number of mappable atoms
707 # within lddt_pli_radius
708 counts = dict()
709 # the select statement also excludes the ligand in mdl_bs
710 # as it resides in a separate chain
711 mdl_cname = ch_tuple[1]
712 query = "cname=" + mol.QueryQuoteName(mdl_cname)
713 mdl_bs_ch = mdl_bs.Select(query)
714 for a in mdl_ligand_res.atoms:
715 close_atoms = \
716 mdl_bs_ch.FindWithin(a.GetPos(), self.lddt_pli_radius)
717 N = 0
718 for close_a in close_atoms:
719 at_key = (close_a.GetResidue().GetNumber(),
720 close_a.GetName())
721 if at_key in self._mappable_atoms[ch_tuple]:
722 N += 1
723 counts[a.hash_code] = N
724
725 # fill cache
726 non_mapped_cache[ch_tuple] = counts
727
728 # add number of mdl contacts which can be mapped to target
729 # as non-fulfilled contacts
730 counts = non_mapped_cache[ch_tuple]
731 lig_hash_codes = [a.hash_code for a in mdl_ligand_res.atoms]
732 for i in mdl_sym:
733 n_exp += counts[lig_hash_codes[i]]
734
735 return n_exp
736
737
738 def _lddt_pli_get_mdl_data(self, model_ligand):
739 if model_ligand not in self._lddt_pli_model_data:
740
741 mdl = self._chain_mapping_mdl
742
743 mdl_residues = set()
744 for at in model_ligand.atoms:
745 close_atoms = mdl.FindWithin(at.GetPos(), self.lddt_pli_radius)
746 for close_at in close_atoms:
747 mdl_residues.add(close_at.GetResidue())
748
749 max_r = self.lddt_pli_radius + max(self.lddt_pli_thresholds)
750 for r in mdl.residues:
751 r.SetIntProp("bs", 0)
752 for at in model_ligand.atoms:
753 close_atoms = mdl.FindWithin(at.GetPos(), max_r)
754 for close_at in close_atoms:
755 close_at.GetResidue().SetIntProp("bs", 1)
756
757 mdl_bs = mol.CreateEntityFromView(mdl.Select("grbs:0=1"), True)
758 mdl_chains = set([ch.name for ch in mdl_bs.chains])
759
760 mdl_editor = mdl_bs.EditXCS(mol.BUFFERED_EDIT)
761 mdl_ligand_chain = None
762 for cname in ["hugo_the_cat_terminator", "ida_the_cheese_monster"]:
763 try:
764 # I'm pretty sure, one of these chain names is not there...
765 mdl_ligand_chain = mdl_editor.InsertChain(cname)
766 break
767 except:
768 pass
769 if mdl_ligand_chain is None:
770 raise RuntimeError("Fuck this, I'm out...")
771 mdl_ligand_res = mdl_editor.AppendResidue(mdl_ligand_chain,
772 model_ligand,
773 deep=True)
774 mdl_editor.RenameResidue(mdl_ligand_res, "LIG")
775 mdl_editor.SetResidueNumber(mdl_ligand_res, mol.ResNum(1))
776
777 chem_mapping = list()
779 chem_mapping.append([x for x in m if x in mdl_chains])
780
781 self._lddt_pli_model_data[model_ligand] = (mdl_residues,
782 mdl_bs,
783 mdl_chains,
784 mdl_ligand_chain,
785 mdl_ligand_res,
786 chem_mapping)
787
788 return self._lddt_pli_model_data[model_ligand]
789
790
791 def _lddt_pli_get_trg_data(self, target_ligand, max_r = None):
792 if target_ligand not in self._lddt_pli_target_data:
793
794 trg = self._chain_mapper.target
795
796 if max_r is None:
797 max_r = self.lddt_pli_radius + max(self.lddt_pli_thresholds)
798
799 trg_residues = set()
800 for at in target_ligand.atoms:
801 close_atoms = trg.FindWithin(at.GetPos(), max_r)
802 for close_at in close_atoms:
803 trg_residues.add(close_at.GetResidue())
804
805 for r in trg.residues:
806 r.SetIntProp("bs", 0)
807
808 for r in trg_residues:
809 r.SetIntProp("bs", 1)
810
811 trg_bs = mol.CreateEntityFromView(trg.Select("grbs:0=1"), True)
812 trg_chains = set([ch.name for ch in trg_bs.chains])
813
814 trg_editor = trg_bs.EditXCS(mol.BUFFERED_EDIT)
815 trg_ligand_chain = None
816 for cname in ["hugo_the_cat_terminator", "ida_the_cheese_monster"]:
817 try:
818 # I'm pretty sure, one of these chain names is not there yet
819 trg_ligand_chain = trg_editor.InsertChain(cname)
820 break
821 except:
822 pass
823 if trg_ligand_chain is None:
824 raise RuntimeError("Fuck this, I'm out...")
825
826 trg_ligand_res = trg_editor.AppendResidue(trg_ligand_chain,
827 target_ligand,
828 deep=True)
829 trg_editor.RenameResidue(trg_ligand_res, "LIG")
830 trg_editor.SetResidueNumber(trg_ligand_res, mol.ResNum(1))
831
832 compound_name = trg_ligand_res.name
833 compound = lddt.CustomCompound.FromResidue(trg_ligand_res)
834 custom_compounds = {compound_name: compound}
835
836 scorer = lddt.lDDTScorer(trg_bs,
837 custom_compounds = custom_compounds,
838 inclusion_radius = self.lddt_pli_radius)
839
840 chem_groups = list()
841 for g in self._chain_mapper.chem_groups:
842 chem_groups.append([x for x in g if x in trg_chains])
843
844 self._lddt_pli_target_data[target_ligand] = (trg_residues,
845 trg_bs,
846 trg_chains,
847 trg_ligand_chain,
848 trg_ligand_res,
849 scorer,
850 chem_groups)
851
852 return self._lddt_pli_target_data[target_ligand]
853
854
855 def _lddt_pli_cut_ref_mdl_alns(self, chem_groups, chem_mapping, mdl_bs,
856 ref_bs):
857 cut_ref_mdl_alns = dict()
858 for ref_chem_group, mdl_chem_group in zip(chem_groups, chem_mapping):
859 for ref_ch in ref_chem_group:
860
861 ref_bs_chain = ref_bs.FindChain(ref_ch)
862 query = "cname=" + mol.QueryQuoteName(ref_ch)
863 ref_view = self._chain_mapper.target.Select(query)
864
865 for mdl_ch in mdl_chem_group:
866 aln = self._ref_mdl_alns[(ref_ch, mdl_ch)]
867
868 aln.AttachView(0, ref_view)
869
870 mdl_bs_chain = mdl_bs.FindChain(mdl_ch)
871 query = "cname=" + mol.QueryQuoteName(mdl_ch)
872 aln.AttachView(1, self._chain_mapping_mdl.Select(query))
873
874 cut_mdl_seq = ['-'] * aln.GetLength()
875 cut_ref_seq = ['-'] * aln.GetLength()
876 for i, col in enumerate(aln):
877
878 # check ref residue
879 r = col.GetResidue(0)
880 if r.IsValid():
881 bs_r = ref_bs_chain.FindResidue(r.GetNumber())
882 if bs_r.IsValid():
883 cut_ref_seq[i] = col[0]
884
885 # check mdl residue
886 r = col.GetResidue(1)
887 if r.IsValid():
888 bs_r = mdl_bs_chain.FindResidue(r.GetNumber())
889 if bs_r.IsValid():
890 cut_mdl_seq[i] = col[1]
891
892 cut_ref_seq = ''.join(cut_ref_seq)
893 cut_mdl_seq = ''.join(cut_mdl_seq)
894 cut_aln = seq.CreateAlignment()
895 cut_aln.AddSequence(seq.CreateSequence(ref_ch, cut_ref_seq))
896 cut_aln.AddSequence(seq.CreateSequence(mdl_ch, cut_mdl_seq))
897 cut_ref_mdl_alns[(ref_ch, mdl_ch)] = cut_aln
898 return cut_ref_mdl_alns
899
900 @property
902 """ Stores mappable atoms given a chain mapping
903
904 Store for each ref_ch,mdl_ch pair all mdl atoms that can be
905 mapped. Don't store mappable atoms as hashes but rather as tuple
906 (mdl_r.GetNumber(), mdl_a.GetName()). Reason for that is that one might
907 operate on Copied EntityHandle objects without corresponding hashes.
908 Given a tuple defining c_pair: (ref_cname, mdl_cname), one
909 can check if a certain atom is mappable by evaluating:
910 if (mdl_r.GetNumber(), mdl_a.GetName()) in self._mappable_atoms(c_pair)
911 """
912 if self.__mappable_atoms is None:
913 self.__mappable_atoms = dict()
914 for (ref_cname, mdl_cname), aln in self._ref_mdl_alns.items():
915 self._mappable_atoms[(ref_cname, mdl_cname)] = set()
916 ref_query = f"cname={mol.QueryQuoteName(ref_cname)}"
917 mdl_query = f"cname={mol.QueryQuoteName(mdl_cname)}"
918 ref_ch = self._chain_mapper.target.Select(ref_query)
919 mdl_ch = self._chain_mapping_mdl.Select(mdl_query)
920 aln.AttachView(0, ref_ch)
921 aln.AttachView(1, mdl_ch)
922 for col in aln:
923 ref_r = col.GetResidue(0)
924 mdl_r = col.GetResidue(1)
925 if ref_r.IsValid() and mdl_r.IsValid():
926 for mdl_a in mdl_r.atoms:
927 if ref_r.FindAtom(mdl_a.name).IsValid():
928 c_key = (ref_cname, mdl_cname)
929 at_key = (mdl_r.GetNumber(), mdl_a.name)
930 self.__mappable_atoms[c_key].add(at_key)
931
932 return self.__mappable_atoms
933
934# specify public interface
935__all__ = ('LDDTPLIScorer',)
_lddt_pli_unmapped_chain_penalty(self, unmapped_chains, non_mapped_cache, mdl_bs, mdl_ligand_res, mdl_sym)
__init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, rename_ligand_chain=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e4, lddt_pli_radius=6.0, add_mdl_contacts=True, lddt_pli_thresholds=[0.5, 1.0, 2.0, 4.0], lddt_pli_binding_site_radius=None, min_pep_length=6, min_nuc_length=4, pep_seqid_thr=95., nuc_seqid_thr=95., mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0., seqres=None, trg_seqres_mapping=None)
_compute(self, symmetries, target_ligand, model_ligand)
_compute_lddt_pli_add_mdl_contacts(self, symmetries, target_ligand, model_ligand)
_compute_lddt_pli_classic(self, symmetries, target_ligand, model_ligand)
_lddt_pli_cut_ref_mdl_alns(self, chem_groups, chem_mapping, mdl_bs, ref_bs)
Real DLLEXPORT_OST_GEOM Distance(const Line2 &l, const Vec2 &v)