107 def __init__(self, model, target, model_ligands, target_ligands,
108 resnum_alignments=False, rename_ligand_chain=False,
109 substructure_match=False, coverage_delta=0.2,
110 max_symmetries=1e4, lddt_pli_radius=6.0,
111 add_mdl_contacts=True,
112 lddt_pli_thresholds = [0.5, 1.0, 2.0, 4.0],
113 lddt_pli_binding_site_radius=None,
115 min_nuc_length = 4, pep_seqid_thr = 95.,
117 mdl_map_pep_seqid_thr = 0.,
118 mdl_map_nuc_seqid_thr = 0.,
120 trg_seqres_mapping=None):
122 super().
__init__(model, target, model_ligands, target_ligands,
123 resnum_alignments = resnum_alignments,
124 rename_ligand_chain = rename_ligand_chain,
125 substructure_match = substructure_match,
126 coverage_delta = coverage_delta,
127 max_symmetries = max_symmetries,
128 min_pep_length = min_pep_length,
129 min_nuc_length = min_nuc_length,
130 pep_seqid_thr = pep_seqid_thr,
131 nuc_seqid_thr = nuc_seqid_thr,
132 mdl_map_pep_seqid_thr = mdl_map_pep_seqid_thr,
133 mdl_map_nuc_seqid_thr = mdl_map_nuc_seqid_thr,
135 trg_seqres_mapping = trg_seqres_mapping)
149 "There were no LDDT contacts between the "
150 "binding site and the ligand, and LDDT-PLI "
153 "Unknown error occured in LDDTPLIScorer")
199 trg_residues, trg_bs, trg_chains, trg_ligand_chain, \
200 trg_ligand_res, scorer, chem_groups = \
203 trg_bs_center = trg_bs.geometric_center
209 ref_indices = [a.copy()
for a
in scorer.ref_indices_ic]
210 ref_distances = [a.copy()
for a
in scorer.ref_distances_ic]
214 ligand_start_idx = scorer.chain_start_indices[-1]
215 for at_idx
in range(ligand_start_idx):
216 mask = ref_indices[at_idx] >= ligand_start_idx
217 ref_indices[at_idx] = ref_indices[at_idx][mask]
218 ref_distances[at_idx] = ref_distances[at_idx][mask]
220 mdl_residues, mdl_bs, mdl_chains, mdl_ligand_chain, mdl_ligand_res, \
239 chain_mappings = list(chain_mapping._ChainMappings(chem_groups,
244 ligand_atom_mappings = [set()
for a
in mdl_ligand_res.atoms]
245 for (trg_sym, mdl_sym)
in symmetries:
246 for trg_i, mdl_i
in zip(trg_sym, mdl_sym):
247 ligand_atom_mappings[mdl_i].add(trg_i)
249 mdl_ligand_pos = np.zeros((mdl_ligand_res.GetAtomCount(), 3))
250 for a_idx, a
in enumerate(mdl_ligand_res.atoms):
252 mdl_ligand_pos[a_idx, 0] = p[0]
253 mdl_ligand_pos[a_idx, 1] = p[1]
254 mdl_ligand_pos[a_idx, 2] = p[2]
256 trg_ligand_pos = np.zeros((trg_ligand_res.GetAtomCount(), 3))
257 for a_idx, a
in enumerate(trg_ligand_res.atoms):
259 trg_ligand_pos[a_idx, 0] = p[0]
260 trg_ligand_pos[a_idx, 1] = p[1]
261 trg_ligand_pos[a_idx, 2] = p[2]
263 mdl_lig_hashes = [a.hash_code
for a
in mdl_ligand_res.atoms]
265 symmetric_atoms = np.asarray(sorted(list(scorer.symmetric_atoms)),
287 scoring_cache = list()
288 penalty_cache = list()
290 for mapping
in chain_mappings:
293 flat_mapping = dict()
294 for trg_chem_group, mdl_chem_group
in zip(chem_groups, mapping):
295 for a,b
in zip(trg_chem_group, mdl_chem_group):
296 if a
is not None and b
is not None:
301 bs_atom_mapping = dict()
302 for mdl_cname, ref_cname
in flat_mapping.items():
303 aln = cut_ref_mdl_alns[(ref_cname, mdl_cname)]
304 ref_ch = trg_bs.Select(f
"cname={mol.QueryQuoteName(ref_cname)}")
305 mdl_ch = mdl_bs.Select(f
"cname={mol.QueryQuoteName(mdl_cname)}")
306 aln.AttachView(0, ref_ch)
307 aln.AttachView(1, mdl_ch)
309 ref_r = col.GetResidue(0)
310 mdl_r = col.GetResidue(1)
311 if ref_r.IsValid()
and mdl_r.IsValid():
312 for mdl_a
in mdl_r.atoms:
313 ref_a = ref_r.FindAtom(mdl_a.GetName())
315 ref_h = ref_a.handle.hash_code
316 if ref_h
in scorer.atom_indices:
317 mdl_h = mdl_a.handle.hash_code
318 bs_atom_mapping[mdl_h] = \
319 scorer.atom_indices[ref_h]
324 for mdl_a_idx, mdl_a
in enumerate(mdl_ligand_res.atoms):
326 trg_bs_indices = list()
327 close_a = mdl_bs.FindWithin(mdl_a.GetPos(),
330 mdl_a_hash_code = a.hash_code
331 if mdl_a_hash_code
in bs_atom_mapping:
332 trg_bs_indices.append(bs_atom_mapping[mdl_a_hash_code])
333 elif mdl_a_hash_code
not in mdl_lig_hashes:
334 if a.GetChain().GetName()
in flat_mapping:
336 at_key = (a.GetResidue().GetNumber(), a.name)
337 cname = a.GetChain().name
338 cname_key = (flat_mapping[cname], cname)
347 n_penalties.append(n_penalty)
349 trg_bs_indices = np.asarray(sorted(trg_bs_indices))
351 for trg_a_idx
in ligand_atom_mappings[mdl_a_idx]:
355 mask = np.isin(trg_bs_indices,
356 ref_indices[ligand_start_idx + trg_a_idx],
357 assume_unique=
True, invert=
True)
358 added_indices = np.asarray([], dtype=np.int64)
359 added_distances = np.asarray([], dtype=np.float64)
362 added_indices = trg_bs_indices[mask]
363 tmp = scorer.positions.take(added_indices, axis=0)
364 np.subtract(tmp, trg_ligand_pos[trg_a_idx][
None, :],
366 np.square(tmp, out=tmp)
367 tmp = tmp.sum(axis=1)
368 np.sqrt(tmp, out=tmp)
369 added_distances = tmp
372 sym_mask = np.isin(added_indices, symmetric_atoms,
375 cache[(mdl_a_idx, trg_a_idx)] = (added_indices,
377 added_indices[sym_mask],
378 added_distances[sym_mask])
380 scoring_cache.append(cache)
381 penalty_cache.append(n_penalties)
389 non_mapped_cache = dict()
396 best_result = {
"lddt_pli":
None,
397 "lddt_pli_n_contacts": 0,
398 "chain_mapping":
None}
401 ligand_aln = seq.CreateAlignment()
402 trg_s = seq.CreateSequence(trg_ligand_chain.name,
403 trg_ligand_res.GetOneLetterCode())
404 mdl_s = seq.CreateSequence(mdl_ligand_chain.name,
405 mdl_ligand_res.GetOneLetterCode())
406 ligand_aln.AddSequence(trg_s)
407 ligand_aln.AddSequence(mdl_s)
408 ligand_at_indices = list(range(ligand_start_idx, scorer.n_atoms))
410 sym_idx_collector = [
None] * scorer.n_atoms
411 sym_dist_collector = [
None] * scorer.n_atoms
413 for mapping, s_cache, p_cache
in zip(chain_mappings, scoring_cache,
416 lddt_chain_mapping = dict()
418 for ref_chem_group, mdl_chem_group
in zip(chem_groups, mapping):
419 for ref_ch, mdl_ch
in zip(ref_chem_group, mdl_chem_group):
421 if mdl_ch
is not None:
422 lddt_chain_mapping[mdl_ch] = ref_ch
423 lddt_alns[mdl_ch] = cut_ref_mdl_alns[(ref_ch, mdl_ch)]
426 lddt_chain_mapping[mdl_ligand_chain.name] = trg_ligand_chain.name
427 lddt_alns[mdl_ligand_chain.name] = ligand_aln
431 pos, _, _, _, _, _, lddt_symmetries = \
432 scorer._ProcessModel(mdl_bs, lddt_chain_mapping,
433 residue_mapping = lddt_alns,
435 check_resnames =
False)
445 unmapped_chains = list()
446 already_mapped = set()
447 for mdl_ch
in mdl_chains:
448 if mdl_ch
not in lddt_chain_mapping:
460 if chem_grp_idx
is None:
461 raise RuntimeError(
"This should never happen... "
466 if trg_ch
not in lddt_chain_mapping.values():
467 if trg_ch
not in already_mapped:
469 c = ch.geometric_center
471 if closest_dist
is None or d < closest_dist:
474 if closest_ch
is not None:
475 unmapped_chains.append((closest_ch, mdl_ch))
476 already_mapped.add(closest_ch)
478 for (trg_sym, mdl_sym)
in symmetries:
481 for mdl_i, trg_i
in zip(mdl_sym, trg_sym):
482 pos[ligand_start_idx + trg_i, :] = mdl_ligand_pos[mdl_i, :]
485 funky_ref_indices = [np.copy(a)
for a
in ref_indices]
486 funky_ref_distances = [np.copy(a)
for a
in ref_distances]
493 for idx
in symmetric_atoms:
494 sym_idx_collector[idx] = list()
495 sym_dist_collector[idx] = list()
499 for mdl_i, trg_i
in zip(mdl_sym, trg_sym):
500 added_penalty += p_cache[mdl_i]
501 cache = s_cache[mdl_i, trg_i]
502 full_trg_i = ligand_start_idx + trg_i
503 funky_ref_indices[full_trg_i] = \
504 np.append(funky_ref_indices[full_trg_i], cache[0])
505 funky_ref_distances[full_trg_i] = \
506 np.append(funky_ref_distances[full_trg_i], cache[1])
507 for idx, d
in zip(cache[2], cache[3]):
508 sym_idx_collector[idx].append(full_trg_i)
509 sym_dist_collector[idx].append(d)
511 for idx
in symmetric_atoms:
512 funky_ref_indices[idx] = \
513 np.append(funky_ref_indices[idx],
514 np.asarray(sym_idx_collector[idx],
516 funky_ref_distances[idx] = \
517 np.append(funky_ref_distances[idx],
518 np.asarray(sym_dist_collector[idx],
530 N = sum([len(funky_ref_indices[i])
for i
in ligand_at_indices])
534 if len(unmapped_chains) > 0:
541 conserved = np.sum(scorer._EvalAtoms(pos, ligand_at_indices,
544 funky_ref_distances),
548 score = np.mean(conserved/N)
550 if score
is not None and score > best_score:
552 save_chain_mapping = dict(lddt_chain_mapping)
553 del save_chain_mapping[mdl_ligand_chain.name]
554 best_result = {
"lddt_pli": score,
555 "lddt_pli_n_contacts": N,
556 "chain_mapping": save_chain_mapping}
559 best_result[
"target_ligand"] = target_ligand
560 best_result[
"model_ligand"] = model_ligand
576 trg_residues, trg_bs, trg_chains, trg_ligand_chain, \
577 trg_ligand_res, scorer, chem_groups = \
584 ref_indices = [a.copy()
for a
in scorer.ref_indices_ic]
585 ref_distances = [a.copy()
for a
in scorer.ref_distances_ic]
589 ligand_start_idx = scorer.chain_start_indices[-1]
590 ligand_at_indices = list(range(ligand_start_idx, scorer.n_atoms))
591 n_exp = sum([len(ref_indices[i])
for i
in ligand_at_indices])
593 mdl_residues, mdl_bs, mdl_chains, mdl_ligand_chain, mdl_ligand_res, \
598 return {
"lddt_pli":
None,
599 "lddt_pli_n_contacts": 0,
600 "chain_mapping":
None,
601 "target_ligand": target_ligand,
602 "model_ligand": model_ligand}
606 for at_idx
in range(ligand_start_idx):
607 mask = ref_indices[at_idx] >= ligand_start_idx
608 ref_indices[at_idx] = ref_indices[at_idx][mask]
609 ref_distances[at_idx] = ref_distances[at_idx][mask]
629 l_aln = seq.CreateAlignment()
630 l_aln.AddSequence(seq.CreateSequence(trg_ligand_chain.name,
631 trg_ligand_res.GetOneLetterCode()))
632 l_aln.AddSequence(seq.CreateSequence(mdl_ligand_chain.name,
633 mdl_ligand_res.GetOneLetterCode()))
635 mdl_ligand_pos = np.zeros((model_ligand.GetAtomCount(), 3))
636 for a_idx, a
in enumerate(model_ligand.atoms):
638 mdl_ligand_pos[a_idx, 0] = p[0]
639 mdl_ligand_pos[a_idx, 1] = p[1]
640 mdl_ligand_pos[a_idx, 2] = p[2]
642 for mapping
in chain_mapping._ChainMappings(chem_groups, chem_mapping):
644 lddt_chain_mapping = dict()
646 for ref_chem_group, mdl_chem_group
in zip(chem_groups, mapping):
647 for ref_ch, mdl_ch
in zip(ref_chem_group, mdl_chem_group):
649 if mdl_ch
is not None:
650 lddt_chain_mapping[mdl_ch] = ref_ch
651 lddt_alns[mdl_ch] = cut_ref_mdl_alns[(ref_ch, mdl_ch)]
654 lddt_chain_mapping[mdl_ligand_chain.name] = trg_ligand_chain.name
655 lddt_alns[mdl_ligand_chain.name] = l_aln
659 pos, _, _, _, _, _, lddt_symmetries = \
660 scorer._ProcessModel(mdl_bs, lddt_chain_mapping,
661 residue_mapping = lddt_alns,
663 check_resnames =
False)
665 for (trg_sym, mdl_sym)
in symmetries:
666 for mdl_i, trg_i
in zip(mdl_sym, trg_sym):
667 pos[ligand_start_idx + trg_i, :] = mdl_ligand_pos[mdl_i, :]
677 conserved = np.sum(scorer._EvalAtoms(pos, ligand_at_indices,
680 ref_distances), axis=0)
681 score = np.mean(conserved/n_exp)
683 if score > best_score:
685 save_chain_mapping = dict(lddt_chain_mapping)
686 del save_chain_mapping[mdl_ligand_chain.name]
687 best_result = {
"lddt_pli": score,
688 "chain_mapping": save_chain_mapping}
691 best_result[
"lddt_pli_n_contacts"] = n_exp
692 best_result[
"target_ligand"] = target_ligand
693 best_result[
"model_ligand"] = model_ligand