OpenStructure
Loading...
Searching...
No Matches
ligand_scoring_base.py
Go to the documentation of this file.
1from contextlib import contextmanager
2import numpy as np
3import networkx
4
5import ost
6from ost import mol
7from ost import conop
8from ost import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
9from ost.mol.alg import chain_mapping
10
11@contextmanager
13 """ Context manager to temporarily reduce the verbosity level by n.
14
15 Example usage:
16 with _SinkVerbosityLevel(2):
17 LogVerbose("Test")
18 Will only write "Test" in script level (2 above)
19 """
20 PushVerbosityLevel(GetVerbosityLevel() - n)
21 try:
22 yield
23 except:
24 raise
25 finally:
26 PopVerbosityLevel()
27
28
30 """Return a parsable string of the atom in the format:
31 ChainName.ResidueNumber.InsertionCode.AtomName."""
32 resnum = a.residue.number
33 return "{cname}.{rnum}.{ins_code}.{aname}".format(
34 cname=a.chain.name,
35 rnum=resnum.num,
36 ins_code=resnum.ins_code.strip("\u0000"),
37 aname=a.name,
38 )
39
40
42 """Return a parsable string of the residue in the format:
43 ChainName.ResidueNumber.InsertionCode."""
44 resnum = r.number
45 return "{cname}.{rnum}.{ins_code}".format(
46 cname=r.chain.name,
47 rnum=resnum.num,
48 ins_code=resnum.ins_code.strip("\u0000"),
49 )
50
52 """ Scorer to compute various small molecule ligand (non polymer) scores.
53
54 :class:`LigandScorer` is an abstract base class dealing with all the setup,
55 data storage, enumerating ligand symmetries and target/model ligand
56 matching/assignment. But actual score computation is delegated to child
57 classes.
58
59 At the moment, two such classes are available:
60
61 * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
62 that assesses the conservation of protein-ligand
63 contacts (LDDT-PLI);
64 * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
65 that computes a binding-site superposed, symmetry-corrected RMSD
66 (BiSyRMSD) and ligand pocket LDDT (LDDT-LP).
67
68 All versus all scores are available through the lazily computed
69 :attr:`score_matrix`. However, many things can go wrong... be it even
70 something as simple as two ligands not matching. Error states therefore
71 encode scoring issues. An Issue for a particular ligand is indicated by a
72 non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
73 This invalidates pairwise scores of such a ligand with all other ligands.
74 This and other issues in pairwise score computation are reported in
75 :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
76 Only if the respective location is 0, a valid pairwise score can be
77 expected. The states and their meaning can be explored with code::
78
79 for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
80 print(state_code)
81 print(short_desc)
82 print(desc)
83
84 A common use case is to derive a one-to-one mapping between ligands in
85 the model and the target for which :class:`LigandScorer` provides an
86 automated :attr:`assignment` procedure.
87 By default, only exact matches between target and model ligands are
88 considered. This is a problem when the target only contains a subset
89 of the expected atoms (for instance if atoms are missing in an
90 experimental structure, which often happens in the PDB). With
91 `substructure_match=True`, complete model ligands can be scored against
92 partial target ligands. One problem with this approach is that it is
93 very easy to find good matches to small, irrelevant ligands like EDO, CO2
94 or GOL. The assignment algorithm therefore considers the coverage,
95 expressed as the fraction of atoms of the model ligand atoms covered in the
96 target. Higher coverage matches are prioritized, but a match with a better
97 score will be preferred if it falls within a window of `coverage_delta`
98 (by default 0.2) of a worse-scoring match. As a result, for instance,
99 with a delta of 0.2, a low-score match with coverage 0.96 would be
100 preferred over a high-score match with coverage 0.70.
101
102 Assumptions:
103
104 Unlike most of OpenStructure, this class does not assume that the ligands
105 (either for the model or the target) are part of the PDB component
106 dictionary. They may have arbitrary residue names. Residue names do not
107 have to match between the model and the target. Matching is based on
108 the calculation of isomorphisms which depend on the atom element name and
109 atom connectivity (bond order is ignored).
110 It is up to the caller to ensure that the connectivity of atoms is properly
111 set before passing any ligands to this class. Ligands with improper
112 connectivity will lead to bogus results.
113
114 This only applies to the ligand. The rest of the model and target
115 structures (protein, nucleic acids) must still follow the usual rules and
116 contain only residues from the compound library. Structures are cleaned up
117 according to constructor documentation. We advise to
118 use the :func:`ost.mol.alg.scoring_base.MMCIFPrep` and
119 :func:`ost.mol.alg.scoring_base.PDBPrep` for loading which already
120 clean hydrogens and, in the case of MMCIF, optionally extract ligands ready
121 to be used by the :class:`LigandScorer` based on "non-polymer" entity types.
122 In case of PDB file format, ligands must be loaded separately as SDF files.
123
124 Only polymers (protein and nucleic acids) of model and target are considered
125 for ligand binding sites. The
126 :class:`ost.mol.alg.chain_mapping.ChainMapper` is used to enumerate possible
127 mappings of these chains. In short: identical chains in the target are
128 grouped based on pairwise sequence identity
129 (see pep_seqid_thr/nuc_seqid_thr param). Each model chain is assigned to
130 one of these groups (see mdl_map_pep_seqid_thr/mdl_map_nuc_seqid_thr param).
131 To avoid spurious matches, only polymers of a certain length are considered
132 in this matching procedure (see min_pep_length/min_nuc_length param).
133 Shorter polymers are never mapped and do not contribute to scoring.
134
135 Here is an example of how to setup a scorer::
136
137 from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
138 from ost.mol.alg.scoring_base import MMCIFPrep
139 from ost.mol.alg.scoring_base import PDBPrep
140
141 # Load data
142 # Structure model in PDB format, containing the receptor only
143 model = PDBPrep("path_to_model.pdb")
144 # Ligand model as SDF file
145 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
146 # Target loaded from mmCIF, containing the ligand
147 target, target_ligands = MMCIFPrep("path_to_target.cif",
148 extract_nonpoly=True)
149
150 # Setup scorer object and compute SCRMSD
151 model_ligands = [model_ligand.Select("ele != H")]
152 sc = SCRMSDScorer(model, target, model_ligands, target_ligands)
153
154 # Perform assignment and read respective scores
155 for lig_pair in sc.assignment:
156 trg_lig = sc.target_ligands[lig_pair[0]]
157 mdl_lig = sc.model_ligands[lig_pair[1]]
158 score = sc.score_matrix[lig_pair[0], lig_pair[1]]
159 print(f"Score for {trg_lig} and {mdl_lig}: {score}")
160
161 # check cleanup in model and target structure:
162 print("model cleanup:", sc.model_cleanup_log)
163 print("target cleanup:", sc.target_cleanup_log)
164
165
166 :param model: Model structure - a deep copy is available as :attr:`model`.
167 The model undergoes the following cleanup steps which are
168 dependent on :class:`ost.conop.CompoundLib` returned by
169 :func:`ost.conop.GetDefaultLib`: 1) removal
170 of hydrogens, 2) removal of residues for which there is no
171 entry in :class:`ost.conop.CompoundLib`, 3) removal of
172 residues that are not peptide linking or nucleotide linking
173 according to :class:`ost.conop.CompoundLib` 4) removal of
174 atoms that are not defined for respective residues in
175 :class:`ost.conop.CompoundLib`. Except step 1), every cleanup
176 is logged with :class:`ost.LogLevel` Warning and a report is
177 available as :attr:`model_cleanup_log`.
178 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
179 :param target: Target structure - same processing as *model*.
180 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
181 :param model_ligands: Model ligands, as a list of
182 :class:`ost.mol.ResidueHandle`/
183 :class:`ost.mol.ResidueView`/
184 :class:`ost.mol.EntityHandle`/
185 :class:`ost.mol.EntityView`. For
186 :class:`ost.mol.EntityHandle`/
187 :class:`ost.mol.EntityView`, each residue is
188 considered to be an individual ligand.
189 All ligands are copied into a separate
190 :class:`ost.mol.EntityHandle` available as
191 :attr:`model_ligand_ent` and the respective
192 list of ligands is available as :attr:`model_ligands`.
193 :type model_ligands: :class:`list`
194 :param target_ligands: Target ligands, same processing as model ligands.
195 :type target_ligands: :class:`list`
196 :param resnum_alignments: Whether alignments between chemically equivalent
197 chains in *model* and *target* can be computed
198 based on residue numbers. This can be assumed in
199 benchmarking setups such as CAMEO/CASP.
200 :type resnum_alignments: :class:`bool`
201 :param substructure_match: Set this to True to allow incomplete (i.e.
202 partially resolved) target ligands.
203 :type substructure_match: :class:`bool`
204 :param coverage_delta: the coverage delta for partial ligand assignment.
205 :type coverage_delta: :class:`float`
206 :param max_symmetries: If more than that many isomorphisms exist for
207 a target-ligand pair, it will be ignored and reported
208 as unassigned.
209 :type max_symmetries: :class:`int`
210 :param min_pep_length: Relevant parameter if short peptides are involved in
211 the polymer binding site. Minimum peptide length for
212 a chain to be considered in chain mapping.
213 The chain mapping algorithm first performs an all vs.
214 all pairwise sequence alignment to identify \"equal\"
215 chains within the target structure. We go for simple
216 sequence identity there. Short sequences can be
217 problematic as they may produce high sequence identity
218 alignments by pure chance.
219 :type min_pep_length: :class:`int`
220 :param min_nuc_length: Same for nucleotides
221 :type min_nuc_length: :class:`int`
222 :param pep_seqid_thr: Parameter that affects identification of identical
223 chains in target - see
224 :class:`ost.mol.alg.chain_mapping.ChainMapper`
225 :type pep_seqid_thr: :class:`float`
226 :param nuc_seqid_thr: Parameter that affects identification of identical
227 chains in target - see
228 :class:`ost.mol.alg.chain_mapping.ChainMapper`
229 :type nuc_seqid_thr: :class:`float`
230 :param mdl_map_pep_seqid_thr: Parameter that affects mapping of model chains
231 to target chains - see
232 :class:`ost.mol.alg.chain_mapping.ChainMapper`
233 :type mdl_map_pep_seqid_thr: :class:`float`
234 :param mdl_map_nuc_seqid_thr: Parameter that affects mapping of model chains
235 to target chains - see
236 :class:`ost.mol.alg.chain_mapping.ChainMapper`
237 :type mdl_map_nuc_seqid_thr: :class:`float`
238 :param seqres: Parameter that affects identification of identical chains in
239 target - see :class:`ost.mol.alg.chain_mapping.ChainMapper`
240 :type seqres: :class:`ost.seq.SequenceList`
241 :param trg_seqres_mapping: Parameter that affects identification of identical
242 chains in target - see
243 :class:`ost.mol.alg.chain_mapping.ChainMapper`
244 :type trg_seqres_mapping: :class:`dict`
245 """
246
247 def __init__(self, model, target, model_ligands, target_ligands,
248 resnum_alignments=False, substructure_match=False,
249 coverage_delta=0.2, max_symmetries=1e5,
250 rename_ligand_chain=False, min_pep_length = 6,
251 min_nuc_length = 4, pep_seqid_thr = 95.,
252 nuc_seqid_thr = 95.,
253 mdl_map_pep_seqid_thr = 0.,
254 mdl_map_nuc_seqid_thr = 0.,
255 seqres = None,
256 trg_seqres_mapping = None):
257
258 if isinstance(model, mol.EntityView):
259 self._model = mol.CreateEntityFromView(model, False)
260 elif isinstance(model, mol.EntityHandle):
261 self._model = model.Copy()
262 else:
263 raise RuntimeError("model must be of type EntityView/EntityHandle")
264
265 if isinstance(target, mol.EntityView):
266 self._target = mol.CreateEntityFromView(target, False)
267 elif isinstance(target, mol.EntityHandle):
268 self._target = target.Copy()
269 else:
270 raise RuntimeError("target must be of type EntityView/EntityHandle")
271
272 clib = conop.GetDefaultLib()
273 if not clib:
274 ost.LogError("A compound library is required. "
275 "Please refer to the OpenStructure website: "
276 "https://openstructure.org/docs/conop/compoundlib/.")
277 raise RuntimeError("No compound library found")
279 self._cleanup_polymer_ent(self._target, clib)
281 self._cleanup_polymer_ent(self._model, clib)
282
283 # keep ligands separate from polymer entities
284 self._target_ligand_ent = mol.CreateEntity()
285 self._model_ligand_ent = mol.CreateEntity()
286 target_ligand_ent_ed = self._target_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
287 model_ligand_ent_ed = self._model_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
288
289 self._target_ligands = list()
290 for l in target_ligands:
291 if isinstance(l, mol.EntityView) or isinstance(l, mol.EntityHandle):
292 for r in l.residues:
293 self._target_ligands.append(self._copy_ligand(r, self._target_ligand_ent,
294 target_ligand_ent_ed,
295 rename_ligand_chain))
296 elif isinstance(l, mol.ResidueHandle) or isinstance(l, mol.ResidueView):
297 self._target_ligands.append(self._copy_ligand(l, self._target_ligand_ent,
298 target_ligand_ent_ed,
299 rename_ligand_chain))
300 else:
301 raise RuntimeError("ligands must be of type EntityView/"
302 "EntityHandle/ResidueView/ResidueHandle")
303
304 self._model_ligands = list()
305 for l in model_ligands:
306 if isinstance(l, mol.EntityView) or isinstance(l, mol.EntityHandle):
307 for r in l.residues:
308 self._model_ligands.append(self._copy_ligand(r, self._model_ligand_ent,
309 model_ligand_ent_ed,
310 rename_ligand_chain))
311 elif isinstance(l, mol.ResidueHandle) or isinstance(l, mol.ResidueView):
312 self._model_ligands.append(self._copy_ligand(l, self._model_ligand_ent,
313 model_ligand_ent_ed,
314 rename_ligand_chain))
315 else:
316 raise RuntimeError("ligands must be of type EntityView/"
317 "EntityHandle/ResidueView/ResidueHandle")
318
319
320 if len(self.model_ligandsmodel_ligands) == 0:
321 LogWarning("No ligands in the model")
323 raise ValueError("No ligand in the model and in the target")
324
325 self._resnum_alignments = resnum_alignments
326 self._substructure_match = substructure_match
327 self._coverage_delta = coverage_delta
328 self._max_symmetries = max_symmetries
329 self._min_pep_length = min_pep_length
330 self._min_nuc_length = min_nuc_length
331 self._pep_seqid_thr = pep_seqid_thr
332 self._nuc_seqid_thr = nuc_seqid_thr
333 self._mdl_map_pep_seqid_thr = mdl_map_pep_seqid_thr
334 self._mdl_map_nuc_seqid_thr = mdl_map_nuc_seqid_thr
335 self._seqres = seqres
336 self._trg_seqres_mapping = trg_seqres_mapping
337
338 # lazily computed attributes
339 self.__chain_mapper = None
340 self.__chem_mapping = None
341 self.__chem_group_alns = None
342 self.__ref_mdl_alns = None
344 self.__chain_mapping_mdl = None
345
346 # keep track of states
347 # simple integers instead of enums - encoding available in
348 # self.state_decoding
349 self._state_matrix = None
352
353 # score matrices
354 self._score_matrix = None
356 self._aux_matrix = None
357
358 # assignment and derived data
359 self._assignment = None
360 self._score_dict = None
361 self._aux_dict = None
362
363 # human readable description of states - child class must extend with
364 # with child class specific states
365 # each state code comes with a tuple of two elements:
366 # 1) short description 2) human readable description
367 # The actual states are set in _compute_scores in :class:`LigandScorer`
368 # or _compute_score of the child class.
369 if self.substructure_match:
370 iso = "subgraph isomorphism"
371 else:
372 iso = "full graph isomorphism"
373
375 {0: ("OK", "OK"),
376 1: ("identity", f"Ligands could not be matched (by {iso})"),
377 2: ("symmetries", "Too many symmetries between ligand atoms were "
378 "found - increasing max_symmetries might help"),
379 3: ("no_iso", "No full isomorphic match could be found - enabling "
380 "substructure_match might allow a match"),
381 4: ("disconnected", "Ligand graph is disconnected"),
382 5: ("stoichiometry", "Ligand was already assigned to another ligand "
383 "(different stoichiometry)"),
384 6: ("single_ligand_issue", "Cannot compute valid pairwise score as "
385 "either model or target ligand have non-zero state."),
386 9: ("unknown", "An unknown error occured in LigandScorer")}
387
388
389 @property
390 def model(self):
391 """ Model receptor structure
392
393 Processed according to docs in :class:`LigandScorer` constructor
394 """
395 return self._model
396
397 @property
398 def target(self):
399 """ Target receptor structure
400
401 Processed according to docs in :class:`LigandScorer` constructor
402 """
403 return self._target
404
405 @property
407 """ Reports residues/atoms that were removed in model during cleanup
408
409 Residues and atoms are described as :class:`str` in format
410 <chain_name>.<resnum>.<ins_code> (residue) and
411 <chain_name>.<resnum>.<ins_code>.<aname> (atom).
412
413 :class:`dict` with keys:
414
415 * 'cleaned_residues': another :class:`dict` with keys:
416
417 * 'no_clib': residues that have been removed because no entry could be
418 found in :class:`ost.conop.CompoundLib`
419 * 'not_linking': residues that have been removed because they're not
420 peptide or nucleotide linking according to
421 :class:`ost.conop.CompoundLib`
422
423 * 'cleaned_atoms': another :class:`dict` with keys:
424
425 * 'unknown_atoms': atoms that have been removed as they're not part
426 of their respective residue according to
427 :class:`ost.conop.CompoundLib`
428 """
429 return self._model_cleanup_log
430
431 @property
433 """ Same for target
434 """
435 return self._target_cleanup_log
436
437 @property
438 def model_ligands(self):
439 """ Residues representing model ligands
440
441 :class:`list` of :class:`ost.mol.ResidueHandle`
442 """
443 return self._model_ligands
444
445 @property
446 def target_ligands(self):
447 """ Residues representing target ligands
448
449 :class:`list` of :class:`ost.mol.ResidueHandle`
450 """
451 return self._target_ligands
452
453 @property
455 """ Given at :class:`LigandScorer` construction
456 """
457 return self._resnum_alignments
458
459 @property
460 def min_pep_length(self):
461 """ Given at :class:`LigandScorer` construction
462 """
463 return self._min_pep_length
464
465 @property
466 def min_nuc_length(self):
467 """ Given at :class:`LigandScorer` construction
468 """
469 return self._min_nuc_length
470
471 @property
472 def pep_seqid_thr(self):
473 """ Given at :class:`LigandScorer` construction
474 """
475 return self._pep_seqid_thr
476
477 @property
478 def nuc_seqid_thr(self):
479 """ Given at :class:`LigandScorer` construction
480 """
481 return self._nuc_seqid_thr
482
483 @property
485 """ Given at :class:`LigandScorer` construction
486 """
487 return self._mdl_map_pep_seqid_thr
488
489 @property
491 """ Given at :class:`LigandScorer` construction
492 """
493 return self._mdl_map_nuc_seqid_thr
494
495 @property
496 def seqres(self):
497 """ Given at :class:`LigandScorer` construction
498 """
499 return self._seqres
500
501 @property
503 """ Given at :class:`LigandScorer` construction
504 """
505 return self._trg_seqres_mapping
506
507 @property
509 """ Given at :class:`LigandScorer` construction
510 """
511 return self._substructure_match
512
513 @property
514 def coverage_delta(self):
515 """ Given at :class:`LigandScorer` construction
516 """
517 return self._coverage_delta
518
519 @property
520 def max_symmetries(self):
521 """ Given at :class:`LigandScorer` construction
522 """
523 return self._max_symmetries
524
525 @property
526 def state_matrix(self):
527 """ Encodes states of ligand pairs
528
529 Ligand pairs can be matched and a valid score can be expected if
530 respective location in this matrix is 0.
531 Target ligands are in rows, model ligands in columns. States are encoded
532 as integers <= 9. Larger numbers encode errors for child classes.
533 Use something like ``self.state_decoding[3]`` to get a decscription.
534
535 :rtype: :class:`~numpy.ndarray`
536 """
537 if self._state_matrix is None:
538 self._compute_scores()
539 return self._state_matrix
540
541 @property
543 """ Encodes states of model ligands
544
545 Non-zero state in any of the model ligands invalidates the full
546 respective column in :attr:`~state_matrix`.
547
548 :rtype: :class:`~numpy.ndarray`
549 """
550 if self._model_ligand_states is None:
551 self._compute_scores()
552 return self._model_ligand_states
553
554 @property
556 """ Encodes states of target ligands
557
558 Non-zero state in any of the target ligands invalidates the full
559 respective row in :attr:`~state_matrix`.
560
561 :rtype: :class:`~numpy.ndarray`
562 """
563 if self._target_ligand_states is None:
564 self._compute_scores()
565 return self._target_ligand_states
566
567 @property
568 def score_matrix(self):
569 """ Get the matrix of scores.
570
571 Target ligands are in rows, model ligands in columns.
572
573 NaN values indicate that no value could be computed (i.e. different
574 ligands). In other words: values are only valid if the respective
575 location in :attr:`~state_matrix` is 0.
576
577 :rtype: :class:`~numpy.ndarray`
578 """
579 if self._score_matrix is None:
580 self._compute_scores()
581 return self._score_matrix
582
583 @property
585 """ Get the matrix of model ligand atom coverage in the target.
586
587 Target ligands are in rows, model ligands in columns.
588
589 NaN values indicate that no value could be computed (i.e. different
590 ligands). In other words: values are only valid if the respective
591 location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
592 only full match isomorphisms are considered, and therefore only values
593 of 1.0 can be observed.
594
595 :rtype: :class:`~numpy.ndarray`
596 """
597 if self._coverage_matrix is None:
598 self._compute_scores()
599 return self._coverage_matrix
600
601 @property
602 def aux_matrix(self):
603 """ Get the matrix of scorer specific auxiliary data.
604
605 Target ligands are in rows, model ligands in columns.
606
607 Auxiliary data consists of arbitrary data dicts which allow a child
608 class to provide additional information for a scored ligand pair.
609 empty dictionaries indicate that the child class simply didn't return
610 anything or that no value could be computed (e.g. different ligands).
611 In other words: values are only valid if respective location in the
612 :attr:`~state_matrix` is 0.
613
614 :rtype: :class:`~numpy.ndarray`
615 """
616 if self._aux_matrix is None:
617 self._compute_scores()
618 return self._aux_matrix
619
620 @property
621 def assignment(self):
622 """ Ligand assignment based on computed scores
623
624 Implements a greedy algorithm to assign target and model ligands
625 with each other. Starts from each valid ligand pair as indicated
626 by a state of 0 in :attr:`state_matrix`. Each iteration first selects
627 high coverage pairs. Given max_coverage defined as the highest
628 coverage observed in the available pairs, all pairs with coverage
629 in [max_coverage-*coverage_delta*, max_coverage] are selected.
630 The best scoring pair among those is added to the assignment
631 and the whole process is repeated until there are no ligands to
632 assign anymore.
633
634 :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
635 """
636 if self._assignment is None:
637 self._assignment = list()
638 # Build working array that contains tuples for all mdl/trg ligand
639 # pairs with valid score as indicated by a state of 0:
640 # (score, coverage, trg_ligand_idx, mdl_ligand_idx)
641 tmp = list()
642 for trg_idx in range(self.score_matrix.shape[0]):
643 for mdl_idx in range(self.score_matrix.shape[1]):
644 if self.state_matrix[trg_idx, mdl_idx] == 0:
645 tmp.append((self.score_matrix[trg_idx, mdl_idx],
646 self.coverage_matrix[trg_idx, mdl_idx],
647 trg_idx, mdl_idx))
648
649 # sort by score, such that best scoring item is in front
650 if self._score_dir() == '+':
651 tmp.sort(reverse=True)
652 elif self._score_dir() == '-':
653 tmp.sort()
654 else:
655 raise RuntimeError("LigandScorer._score_dir must return one in "
656 "['+', '-']")
657
658 LogScript("Computing ligand assignment")
659 while len(tmp) > 0:
660 # select high coverage ligand pairs in working array
661 coverage_thresh = max([x[1] for x in tmp]) - self.coverage_delta
662 top_coverage = [x for x in tmp if x[1] >= coverage_thresh]
663
664 # working array is sorted by score => just pick first one
665 a = top_coverage[0][2] # selected trg_ligand_idx
666 b = top_coverage[0][3] # selected mdl_ligand_idx
667 self._assignment.append((a, b))
668
669 # kick out remaining pairs involving these ligands
670 tmp = [x for x in tmp if (x[2] != a and x[3] != b)]
671
672 return self._assignment
673
674 @property
675 def score(self):
676 """ Get a dictionary of score values, keyed by model ligand
677
678 Extract score with something like:
679 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
680 The returned scores are based on :attr:`~assignment`.
681
682 :rtype: :class:`dict`
683 """
684 if self._score_dict is None:
685 self._score_dict = dict()
686 for (trg_lig_idx, mdl_lig_idx) in self.assignment:
687 mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
688 cname = mdl_lig.GetChain().GetName()
689 rnum = mdl_lig.GetNumber()
690 if cname not in self._score_dict:
691 self._score_dict[cname] = dict()
692 score = self.score_matrix[trg_lig_idx, mdl_lig_idx]
693 self._score_dict[cname][rnum] = score
694 return self._score_dict
695
696 @property
697 def aux(self):
698 """ Get a dictionary of score details, keyed by model ligand
699
700 Extract dict with something like:
701 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
702 The returned info dicts are based on :attr:`~assignment`. The content is
703 documented in the respective child class.
704
705 :rtype: :class:`dict`
706 """
707 if self._aux_dict is None:
708 self._aux_dict = dict()
709 for (trg_lig_idx, mdl_lig_idx) in self.assignment:
710 mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
711 cname = mdl_lig.GetChain().GetName()
712 rnum = mdl_lig.GetNumber()
713 if cname not in self._aux_dict:
714 self._aux_dict[cname] = dict()
715 d = self.aux_matrix[trg_lig_idx, mdl_lig_idx]
716 self._aux_dict[cname][rnum] = d
717 return self._aux_dict
718
719
720 @property
722 """ Get indices of target ligands which are not assigned
723
724 :rtype: :class:`list` of :class:`int`
725 """
726 # compute on-the-fly, no need for caching
727 assigned = set([x[0] for x in self.assignment])
728 return [x for x in range(len(self.target_ligandstarget_ligands)) if x not in assigned]
729
730 @property
732 """ Get indices of model ligands which are not assigned
733
734 :rtype: :class:`list` of :class:`int`
735 """
736 # compute on-the-fly, no need for caching
737 assigned = set([x[1] for x in self.assignment])
738 return [x for x in range(len(self.model_ligandsmodel_ligands)) if x not in assigned]
739
740 def get_target_ligand_state_report(self, trg_lig_idx):
741 """ Get summary of states observed with respect to all model ligands
742
743 Mainly for debug purposes
744
745 :param trg_lig_idx: Index of target ligand for which report should be
746 generated
747 :type trg_lig_idx: :class:`int`
748 """
749 return self._get_report(self.target_ligand_states[trg_lig_idx],
750 self.state_matrix[trg_lig_idx,:])
751
752 def get_model_ligand_state_report(self, mdl_lig_idx):
753 """ Get summary of states observed with respect to all target ligands
754
755 Mainly for debug purposes
756
757 :param mdl_lig_idx: Index of model ligand for which report should be
758 generated
759 :type mdl_lig_idx: :class:`int`
760 """
761 return self._get_report(self.model_ligand_states[mdl_lig_idx],
762 self.state_matrix[:, mdl_lig_idx])
763
764 def _get_report(self, ligand_state, pair_states):
765 """ Helper
766 """
767 pair_report = list()
768 for s in np.unique(pair_states):
769 desc = self.state_decoding[s]
770 indices = np.flatnonzero(pair_states == s).tolist()
771 pair_report.append({"state": s,
772 "short desc": desc[0],
773 "desc": desc[1],
774 "indices": indices})
775
776 desc = self.state_decoding[ligand_state]
777 ligand_report = {"state": ligand_state,
778 "short desc": desc[0],
779 "desc": desc[1]}
780
781 return (ligand_report, pair_report)
782
784 """ Makes an educated guess why target ligand is not assigned
785
786 This either returns actual error states or custom states that are
787 derived from them. Currently, the following reasons are reported:
788
789 * `no_ligand`: there was no ligand in the model.
790 * `disconnected`: the ligand graph was disconnected.
791 * `identity`: the ligand was not found in the model (by graph
792 isomorphism). Check your ligand connectivity.
793 * `no_iso`: no full isomorphic match could be found. Try enabling
794 `substructure_match=True` if the target ligand is incomplete.
795 * `symmetries`: too many symmetries were found (by graph isomorphisms).
796 Try to increase `max_symmetries`.
797 * `stoichiometry`: there was a possible assignment in the model, but
798 the model ligand was already assigned to a different target ligand.
799 This indicates different stoichiometries.
800 * `no_contact` (LDDT-PLI only): There were no LDDT contacts between
801 the binding site and the ligand, and LDDT-PLI is undefined.
802 * `target_binding_site` (SCRMSD only): no polymer residues were in
803 proximity of the target ligand.
804 * `model_binding_site` (SCRMSD only): the binding site was not found
805 in the model. Either the binding site was not modeled or the model
806 ligand was positioned too far in combination with
807 `full_bs_search=False`.
808
809 :param trg_lig_idx: Index of target ligand
810 :type trg_lig_idx: :class:`int`
811 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
812 sentence describing the issue, (\"unknown\",\"unknown\") if
813 nothing obvious can be found.
814 :raises: :class:`RuntimeError` if specified target ligand is assigned
815 """
816 if trg_lig_idx not in self.unassigned_target_ligands:
817 raise RuntimeError("Specified target ligand is not unassigned")
818
819 # hardcoded tuple if there is simply nothing we can assign it to
820 if len(self.model_ligandsmodel_ligands) == 0:
821 return ("no_ligand", "No ligand in the model")
822
823 # if something with the ligand itself is wrong, we can be pretty sure
824 # thats why the ligand is unassigned
825 if self.target_ligand_states[trg_lig_idx] != 0:
826 return self.state_decoding[self.target_ligand_states[trg_lig_idx]]
827
828 # The next best guess comes from looking at pair states
829 tmp = np.unique(self.state_matrix[trg_lig_idx,:])
830
831 # In case of any 0, it could have been assigned so it's probably
832 # just not selected due to different stoichiometry - this is no
833 # defined state, we just return a hardcoded tuple in this case
834 if 0 in tmp:
835 return ("stoichiometry",
836 "Ligand was already assigned to an other "
837 "model ligand (different stoichiometry)")
838
839 # maybe it's a symmetry issue?
840 if 2 in tmp:
841 return self.state_decoding[2]
842
843 # if the state is 6 (single_ligand_issue), there is an issue with its
844 # target counterpart.
845 if 6 in tmp:
846 mdl_idx = np.where(self.state_matrix[trg_lig_idx,:]==6)[0]
847 for i in mdl_idx:
848 if self.model_ligand_states[i] == 0:
849 raise RuntimeError("This should never happen")
850 if self.model_ligand_states[i] != 4 or len(tmp) == 1:
851 # Don't report disconnected if only 1 model ligand is
852 # disconnected, unless that's the only reason
853 return self.state_decoding[self.model_ligand_states[i]]
854
855 # get rid of remaining single ligand issues (only disconnected error)
856 if 6 in tmp and len(tmp) > 1:
857 tmp = tmp[tmp!=6]
858
859 # prefer everything over identity state
860 if 1 in tmp and len(tmp) > 1:
861 tmp = tmp[tmp!=1]
862
863 # just return whatever is left
864 return self.state_decoding[tmp[0]]
865
866
868 """ Makes an educated guess why model ligand is not assigned
869
870 This either returns actual error states or custom states that are
871 derived from them. Currently, the following reasons are reported:
872
873 * `no_ligand`: there was no ligand in the target.
874 * `disconnected`: the ligand graph is disconnected.
875 * `identity`: the ligand was not found in the target (by graph or
876 subgraph isomorphism). Check your ligand connectivity.
877 * `no_iso`: no full isomorphic match could be found. Try enabling
878 `substructure_match=True` if the target ligand is incomplete.
879 * `symmetries`: too many symmetries were found (by graph isomorphisms).
880 Try to increase `max_symmetries`.
881 * `stoichiometry`: there was a possible assignment in the target, but
882 the model target was already assigned to a different model ligand.
883 This indicates different stoichiometries.
884 * `no_contact` (LDDT-PLI only): There were no LDDT contacts between
885 the binding site and the ligand, and LDDT-PLI is undefined.
886 * `target_binding_site` (SCRMSD only): a potential assignment was found
887 in the target, but there were no polymer residues in proximity of the
888 ligand in the target.
889 * `model_binding_site` (SCRMSD only): a potential assignment was
890 found in the target, but no binding site was found in the model.
891 Either the binding site was not modeled or the model ligand was
892 positioned too far in combination with `full_bs_search=False`.
893
894 :param mdl_lig_idx: Index of model ligand
895 :type mdl_lig_idx: :class:`int`
896 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
897 sentence describing the issue, (\"unknown\",\"unknown\") if
898 nothing obvious can be found.
899 :raises: :class:`RuntimeError` if specified model ligand is assigned
900 """
901 if mdl_lig_idx not in self.unassigned_model_ligands:
902 raise RuntimeError("Specified model ligand is not unassigned")
903
904 # hardcoded tuple if there is simply nothing we can assign it to
905 if len(self.target_ligandstarget_ligands) == 0:
906 return ("no_ligand", "No ligand in the target")
907
908 # if something with the ligand itself is wrong, we can be pretty sure
909 # thats why the ligand is unassigned
910 if self.model_ligand_states[mdl_lig_idx] != 0:
911 return self.state_decoding[self.model_ligand_states[mdl_lig_idx]]
912
913 # The next best guess comes from looking at pair states
914 tmp = np.unique(self.state_matrix[:,mdl_lig_idx])
915
916 # In case of any 0, it could have been assigned so it's probably
917 # just not selected due to different stoichiometry - this is no
918 # defined state, we just return a hardcoded tuple in this case
919 if 0 in tmp:
920 return ("stoichiometry",
921 "Ligand was already assigned to an other "
922 "model ligand (different stoichiometry)")
923
924 # maybe its a symmetry issue?
925 if 2 in tmp:
926 return self.state_decoding[2]
927
928 # if the state is 6 (single_ligand_issue), there is an issue with its
929 # target counterpart.
930 if 6 in tmp:
931 trg_idx = np.where(self.state_matrix[:,mdl_lig_idx]==6)[0]
932 for i in trg_idx:
933 if self.target_ligand_states[i] == 0:
934 raise RuntimeError("This should never happen")
935 if self.target_ligand_states[i] != 4 or len(tmp) == 1:
936 # Don't report disconnected if only 1 target ligand is
937 # disconnected, unless that's the only reason
938 return self.state_decoding[self.target_ligand_states[i]]
939
940 # get rid of remaining single ligand issues (only disconnected error)
941 if 6 in tmp and len(tmp) > 1:
942 tmp = tmp[tmp!=6]
943
944 # prefer everything over identity state
945 if 1 in tmp and len(tmp) > 1:
946 tmp = tmp[tmp!=1]
947
948 # just return whatever is left
949 return self.state_decoding[tmp[0]]
950
951 @property
953 return_dict = dict()
954 for i in self.unassigned_model_ligands:
955 lig = self.model_ligandsmodel_ligands[i]
956 cname = lig.GetChain().GetName()
957 rnum = lig.GetNumber()
958 if cname not in return_dict:
959 return_dict[cname] = dict()
960 return_dict[cname][rnum] = \
962 return return_dict
963
964 @property
966 return_dict = dict()
967 for i in self.unassigned_target_ligands:
968 lig = self.target_ligandstarget_ligands[i]
969 cname = lig.GetChain().GetName()
970 rnum = lig.GetNumber()
971 if cname not in return_dict:
972 return_dict[cname] = dict()
973 return_dict[cname][rnum] = \
975 return return_dict
976
977 @property
978 def _chain_mapper(self):
979 """ Chain mapper object for the given :attr:`target`.
980
981 Can be used by child classes if needed, constructed with
982 *resnum_alignments* flag
983
984 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
985 """
986 if self.__chain_mapper is None:
987 with _SinkVerbosityLevel():
988 self.__chain_mapper = \
990 n_max_naive=1e9,
991 resnum_alignments=self.resnum_alignments,
992 min_pep_length=self.min_pep_length,
993 min_nuc_length=self.min_nuc_length,
994 pep_seqid_thr=self.pep_seqid_thr,
995 nuc_seqid_thr=self.nuc_seqid_thr,
996 mdl_map_pep_seqid_thr=self.mdl_map_pep_seqid_thr,
997 mdl_map_nuc_seqid_thr=self.mdl_map_nuc_seqid_thr,
998 seqres = self.seqres,
999 trg_seqres_mapping = self.trg_seqres_mapping)
1000 return self.__chain_mapper
1001
1002 @property
1003 def _chem_mapping(self):
1004 if self.__chem_mapping is None:
1005 self.__chem_mapping, self.__chem_group_alns, \
1007 self._chain_mapper.GetChemMapping(self.modelmodel)
1008 return self.__chem_mapping
1009
1010 @property
1012 if self.__chem_group_alns is None:
1013 self.__chem_mapping, self.__chem_group_alns, \
1015 self._chain_mapper.GetChemMapping(self.modelmodel)
1016 return self.__chem_group_alns
1017
1018 @property
1019 def _ref_mdl_alns(self):
1020 if self.__ref_mdl_alns is None:
1021 self.__ref_mdl_alns = \
1022 chain_mapping._GetRefMdlAlns(self._chain_mapper.chem_groups,
1023 self._chain_mapper.chem_group_alignments,
1026 return self.__ref_mdl_alns
1027
1028 @property
1030 if self.__chain_mapping_mdl is None:
1031 with _SinkVerbosityLevel():
1032 self.__chem_mapping, self.__chem_group_alns, \
1034 self._chain_mapper.GetChemMapping(self.modelmodel)
1035 return self.__chain_mapping_mdl
1036
1037 @property
1046 """
1047 Compute score for every possible target-model ligand pair and store the
1048 result in internal matrices.
1049 """
1050
1053 shape = (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands))
1054 self._score_matrix = np.full(shape, np.nan, dtype=np.float32)
1055 self._coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
1056 self._state_matrix = np.full(shape, 0, dtype=np.int32)
1057 self._model_ligand_states = np.zeros(len(self.model_ligandsmodel_ligands))
1058 self._target_ligand_states = np.zeros(len(self.target_ligandstarget_ligands))
1059 self._aux_matrix = np.empty(shape, dtype=dict)
1060
1061 # precompute ligand graphs
1062 target_graphs = \
1063 [_ResidueToGraph(l, by_atom_index=True) for l in self.target_ligandstarget_ligands]
1064 model_graphs = \
1065 [_ResidueToGraph(l, by_atom_index=True) for l in self.model_ligandsmodel_ligands]
1066
1067 for g_idx, g in enumerate(target_graphs):
1068 if not networkx.is_connected(g):
1069 self._target_ligand_states[g_idx] = 4
1070 self._state_matrix[g_idx,:] = 6
1071 msg = "Disconnected graph observed for target ligand "
1072 msg += str(self.target_ligandstarget_ligands[g_idx])
1073 LogWarning(msg)
1074
1075 for g_idx, g in enumerate(model_graphs):
1076 if not networkx.is_connected(g):
1077 self._model_ligand_states[g_idx] = 4
1078 self._state_matrix[:,g_idx] = 6
1079 msg = "Disconnected graph observed for model ligand "
1080 msg += str(self.model_ligandsmodel_ligands[g_idx])
1081 LogWarning(msg)
1082
1083
1084 LogScript("Computing pairwise scores for all %s x %s ligands" % shape)
1085 for target_id, target_ligand in enumerate(self.target_ligandstarget_ligands):
1086 LogInfo("Analyzing target ligand %s" % target_ligand)
1087
1088 if self._target_ligand_states[target_id] == 4:
1089 # Disconnected graph - already updated states and reported
1090 # to LogVerbose
1091 continue
1092
1093 for model_id, model_ligand in enumerate(self.model_ligandsmodel_ligands):
1094 LogInfo("Comparing to model ligand %s" % model_ligand)
1095
1096
1099
1100 if self._model_ligand_states[model_id] == 4:
1101 # Disconnected graph - already updated states and reported
1102 # to LogVerbose
1103 continue
1104
1105 try:
1106 symmetries = ComputeSymmetries(
1107 model_ligand, target_ligand,
1108 substructure_match=self.substructure_match,
1109 by_atom_index=True,
1110 max_symmetries=self.max_symmetries,
1111 model_graph = model_graphs[model_id],
1112 target_graph = target_graphs[target_id])
1113 LogInfo("Ligands %s and %s symmetry match" % (
1114 str(model_ligand), str(target_ligand)))
1115 except NoSymmetryError:
1116 # Ligands are different - skip
1117 LogInfo("No symmetry between %s and %s" % (
1118 str(model_ligand), str(target_ligand)))
1119 self._state_matrix[target_id, model_id] = 1
1120 continue
1121 except TooManySymmetriesError:
1122 # Ligands are too symmetrical - skip
1123 LogWarning("Too many symmetries between %s and %s" % (
1124 str(model_ligand), str(target_ligand)))
1125 self._state_matrix[target_id, model_id] = 2
1126 continue
1127 except NoIsomorphicSymmetryError:
1128 # Ligands are different - skip
1129 LogInfo("No isomorphic symmetry between %s and %s" % (
1130 str(model_ligand), str(target_ligand)))
1131 self._state_matrix[target_id, model_id] = 3
1132 continue
1133 except DisconnectedGraphError:
1134 # this should never happen as we guard against
1135 # DisconnectedGraphError when precomputing the graph
1136 LogError("Disconnected graph observed for %s and %s" % (
1137 str(model_ligand), str(target_ligand)))
1138 # just set both ligand states to 4
1139 self._model_ligand_states[model_id] = 4
1140 self._model_ligand_states[target_id] = 4
1141 self._state_matrix[target_id, model_id] = 6
1142 continue
1143
1144
1147 score, pair_state, target_ligand_state, model_ligand_state, aux\
1148 = self._compute(symmetries, target_ligand, model_ligand)
1149
1150
1153
1154 # Ensure that returned states are associated with a
1155 # description. This is a requirement when subclassing
1156 # LigandScorer => state_decoding dict from base class must
1157 # be modified in subclass constructor
1158 if pair_state not in self.state_decoding or \
1159 target_ligand_state not in self.state_decoding or \
1160 model_ligand_state not in self.state_decoding:
1161 raise RuntimeError(f"Subclass returned state "
1162 f"\"{state}\" for which no "
1163 f"description is available. Point "
1164 f"the developer of the used scorer "
1165 f"to this error message.")
1166
1167 # if any of the ligand states is non-zero, this must be marked
1168 # by the scorer subclass by specifying a specific pair state
1169 if target_ligand_state != 0 and pair_state != 6:
1170 raise RuntimeError("Observed non-zero target-ligand "
1171 "state which must trigger certain "
1172 "pair state. Point the developer of "
1173 "the used scorer to this error message")
1174
1175 if model_ligand_state != 0 and pair_state != 6:
1176 raise RuntimeError("Observed non-zero model-ligand "
1177 "state which must trigger certain "
1178 "pair state. Point the developer of "
1179 "the used scorer to this error message")
1180
1181 self._state_matrix[target_id, model_id] = pair_state
1182 self._target_ligand_states[target_id] = target_ligand_state
1183 self._model_ligand_states[model_id] = model_ligand_state
1184 if pair_state == 0:
1185 if score is None or np.isnan(score):
1186 raise RuntimeError("LigandScorer returned invalid "
1187 "score despite 0 error state")
1188 # it's a valid score!
1189 self._score_matrix[target_id, model_id] = score
1190 cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1191 self._coverage_matrix[target_id, model_id] = cvg
1192 self._aux_matrix[target_id, model_id] = aux
1193
1194 def _compute(self, symmetries, target_ligand, model_ligand):
1195 """ Compute score for specified ligand pair - defined by child class
1196
1197 Raises :class:`NotImplementedError` if not implemented by child class.
1198
1199 :param symmetries: Defines symmetries between *target_ligand* and
1200 *model_ligand*. Return value of
1201 :func:`ComputeSymmetries`
1202 :type symmetries: :class:`list` of :class:`tuple` with two elements
1203 each: 1) :class:`list` of atom indices in
1204 *target_ligand* 2) :class:`list` of respective atom
1205 indices in *model_ligand*
1206 :param target_ligand: The target ligand
1207 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1208 :class:`ost.mol.ResidueView`
1209 :param model_ligand: The model ligand
1210 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1211 :class:`ost.mol.ResidueView`
1212
1213 :returns: A :class:`tuple` with three elements: 1) a score
1214 (:class:`float`) 2) state (:class:`int`).
1215 3) auxiliary data for this ligand pair (:class:`dict`).
1216 If state is 0, the score and auxiliary data will be
1217 added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1218 as the respective value in :attr:`~coverage_matrix`.
1219 Returned score must be valid in this case (not None/NaN).
1220 Child specific non-zero states must be >= 10.
1221 """
1222 raise NotImplementedError("_compute must be implemented by child "
1223 "class")
1224
1225 def _score_dir(self):
1226 """ Return direction of score - defined by child class
1227
1228 Relevant for ligand assignment. Must return a string in ['+', '-'].
1229 '+' for ascending scores, i.e. higher is better (lddt etc.)
1230 '-' for descending scores, i.e. lower is better (rmsd etc.)
1231 """
1232 raise NotImplementedError("_score_dir must be implemented by child "
1233 "class")
1234
1235 def _copy_ligand(self, l, ent, ed, rename_ligand_chain):
1236 """ Copies ligand into entity and returns residue handle
1237 """
1238 new_chain = ent.FindChain(l.chain.name)
1239 if not new_chain.IsValid():
1240 new_chain = ed.InsertChain(l.chain.name)
1241 ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY)
1242 else:
1243 # Does a residue with the same name already exist?
1244 already_exists = new_chain.FindResidue(l.number).IsValid()
1245 if already_exists:
1246 if rename_ligand_chain:
1247 chain_ext = 2 # Extend the chain name by this
1248 while True:
1249 new_chain_name = \
1250 l.chain.name + "_" + str(chain_ext)
1251 new_chain = ent.FindChain(new_chain_name)
1252 if new_chain.IsValid():
1253 chain_ext += 1
1254 continue
1255 else:
1256 new_chain = \
1257 ed.InsertChain(new_chain_name)
1258 break
1259 LogInfo("Moved ligand residue %s to new chain %s" % (
1260 l.qualified_name, new_chain.name))
1261 else:
1262 msg = \
1263 "A residue number %s already exists in chain %s" % (
1264 l.number, l.chain.name)
1265 raise RuntimeError(msg)
1266
1267 # Add the residue with its original residue number
1268 new_res = ed.AppendResidue(new_chain, l.name, l.number)
1269 # Add atoms
1270 for old_atom in l.atoms:
1271 ed.InsertAtom(new_res, old_atom.name, old_atom.pos,
1272 element=old_atom.element, occupancy=old_atom.occupancy,
1273 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
1274 # Add bonds
1275 for old_atom in l.atoms:
1276 for old_bond in old_atom.bonds:
1277 new_first = new_res.FindAtom(old_bond.first.name)
1278 new_second = new_res.FindAtom(old_bond.second.name)
1279 ed.Connect(new_first, new_second, old_bond.bond_order)
1280 new_res.SetIsLigand(True)
1281 return new_res
1282
1283 def _cleanup_polymer_ent(self, ent, clib):
1284 """ In principle molck light but logs LigandScorer specific warnings
1285
1286 Only to be applied to polymer entity
1287
1288 1) removes atoms with elements set to H or D (not logged as there is no
1289 effect on scoring)
1290 2) removes residues with no entry in component dictionary
1291 3) removes all residues that are not peptide_liking or
1292 nucleotide_linking according component dictionary
1293 4) removes unknown atoms according to component dictionary
1294 5) reruns processor
1295 """
1296
1297 cleanup_log = {"cleaned_residues": {"no_clib": [],
1298 "not_linking": []},
1299 "cleaned_atoms": {"unknown_atoms": []}}
1300
1301 # 1)
1302 hydrogen_sel = ent.Select("ele == H or ele == D")
1303 if hydrogen_sel.GetAtomCount() > 0:
1304 # just do, no logging as it has no effect on scoring
1305 ent = ost.mol.CreateEntityFromView(ent.Select(
1306 "ele != H and ele != D"), include_exlusive_atoms=False)
1307
1308 # extract residues/atoms for 2), 3) and 4)
1309 not_in_clib = list()
1310 not_linking = list()
1311 unknown_atom = list()
1312
1313 for r in ent.residues:
1314 comp = clib.FindCompound(r.GetName())
1315 if comp is None:
1316 not_in_clib.append(r)
1317 continue
1318 if not (comp.IsPeptideLinking() or comp.IsNucleotideLinking()):
1319 not_linking.append(r)
1320 continue
1321 comp_anames = set([a.name for a in comp.atom_specs])
1322 for a in r.atoms:
1323 if a.name not in comp_anames:
1324 unknown_atom.append(a)
1325
1326 # 2)
1327 if len(not_in_clib) > 0:
1328 cleanup_log["cleaned_residues"]["no_clib"] = \
1329 [_QualifiedResidueNotation(r) for r in not_in_clib]
1330 msg = ("Expected all residues in receptor structure to be defined in "
1331 "compound library but this is not the case. "
1332 "Please refer to the OpenStructure website if an updated "
1333 "library is sufficient to solve the problem: "
1334 "https://openstructure.org/docs/conop/compoundlib/ "
1335 "These residues will not be considered for scoring (which "
1336 "may also affect pre-processing steps such as alignments): ")
1337 msg += ','.join(cleanup_log["cleaned_residues"]["no_clib"])
1338 ost.LogWarning(msg)
1339 for r in not_in_clib:
1340 r.SetIntProp("clib", 1)
1341 ent = ost.mol.CreateEntityFromView(ent.Select("grclib:0!=1"),
1342 include_exlusive_atoms=False)
1343
1344 # 3)
1345 if len(not_linking) > 0:
1346 cleanup_log["cleaned_residues"]["not_linking"] = \
1347 [_QualifiedResidueNotation(r) for r in not_linking]
1348 msg = ("Expected all residues in receptor structure to be peptide "
1349 "linking or nucleotide linking according to the compound "
1350 "library but this is not the case. "
1351 "Please refer to the OpenStructure website if an updated "
1352 "library is sufficient to solve the problem: "
1353 "https://openstructure.org/docs/conop/compoundlib/ "
1354 "These residues will not be considered for scoring (which "
1355 "may also affect pre-processing steps such as alignments): ")
1356 msg += ','.join(cleanup_log["cleaned_residues"]["not_linking"])
1357 ost.LogWarning(msg)
1358 for r in not_linking:
1359 r.SetIntProp("linking", 1)
1360 ent = ost.mol.CreateEntityFromView(ent.Select("grlinking:0!=1"),
1361 include_exlusive_atoms=False)
1362
1363 # 4)
1364 if len(unknown_atom) > 0:
1365 cleanup_log["cleaned_atoms"]["unknown_atoms"] = \
1366 [_QualifiedAtomNotation(a) for a in unknown_atom]
1367 msg = ("Expected atom names according to the compound library but "
1368 "this is not the case."
1369 "Please refer to the OpenStructure website if an updated "
1370 "library is sufficient to solve the problem: "
1371 "https://openstructure.org/docs/conop/compoundlib/ "
1372 "These atoms will not be considered for scoring: ")
1373 msg += ','.join(cleanup_log["cleaned_atoms"]["unknown_atoms"])
1374 ost.LogWarning(msg)
1375 for a in unknown_atom:
1376 a.SetIntProp("unknown", 1)
1377 ent = ost.mol.CreateEntityFromView(ent.Select("gaunknown:0!=1"),
1378 include_exlusive_atoms=False)
1379
1380 # 5)
1381 processor = conop.RuleBasedProcessor(clib)
1382 processor.Process(ent)
1383
1384 return (ent, cleanup_log)
1385
1386
1387def _ResidueToGraph(residue, by_atom_index=False):
1388 """Return a NetworkX graph representation of the residue.
1389
1390 :param residue: the residue from which to derive the graph
1391 :type residue: :class:`ost.mol.ResidueHandle` or
1392 :class:`ost.mol.ResidueView`
1393 :param by_atom_index: Set this parameter to True if you need the nodes to
1394 be labeled by atom index (within the residue).
1395 Otherwise, if False, the nodes will be labeled by
1396 atom names.
1397 :type by_atom_index: :class:`bool`
1398 :rtype: :class:`~networkx.classes.graph.Graph`
1399
1400 Nodes are labeled with the Atom's uppercase
1401 :attr:`~ost.mol.AtomHandle.element`.
1402 """
1403 nxg = networkx.Graph()
1404
1405 for atom in residue.atoms:
1406 nxg.add_node(atom.name, element=atom.element.upper())
1407
1408 # This will list all edges twice - once for every atom of the pair.
1409 # But as of NetworkX 3.0 adding the same edge twice has no effect,
1410 # so we're good.
1411 nxg.add_edges_from([(
1412 b.first.name,
1413 b.second.name) for a in residue.atoms for b in a.GetBondList()])
1414
1415 if by_atom_index:
1416 nxg = networkx.relabel_nodes(nxg,
1417 {a: b for a, b in zip(
1418 [a.name for a in residue.atoms],
1419 range(len(residue.atoms)))},
1420 True)
1421 return nxg
1422
1423def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1424 by_atom_index=False, return_symmetries=True,
1425 max_symmetries=1e6, model_graph = None,
1426 target_graph = None):
1427 """Return a list of symmetries (isomorphisms) of the model onto the target
1428 residues.
1429
1430 :param model_ligand: The model ligand
1431 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1432 :class:`ost.mol.ResidueView`
1433 :param target_ligand: The target ligand
1434 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1435 :class:`ost.mol.ResidueView`
1436 :param substructure_match: Set this to True to allow partial ligands
1437 in the reference.
1438 :type substructure_match: :class:`bool`
1439 :param by_atom_index: Set this parameter to True if you need the symmetries
1440 to refer to atom index (within the residue).
1441 Otherwise, if False, the symmetries refer to atom
1442 names.
1443 :type by_atom_index: :class:`bool`
1444 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1445 return True if a mapping is found (and raise if
1446 no mapping is found). This is useful to quickly
1447 find out if a mapping exist without the expensive
1448 step to find all the mappings.
1449 :type return_symmetries: :class:`bool`
1450 :param max_symmetries: If more than that many isomorphisms exist, raise
1451 a :class:`TooManySymmetriesError`. This can only be assessed by
1452 generating at least that many isomorphisms and can take some time.
1453 :type max_symmetries: :class:`int`
1454 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1455 :class:`NoIsomorphicSymmetryError` in case of isomorphic
1456 subgraph but *substructure_match* is False;
1457 :class:`TooManySymmetriesError` when more than `max_symmetries`
1458 isomorphisms are found; :class:`DisconnectedGraphError` if
1459 graph for *model_ligand*/*target_ligand* is disconnected.
1460 """
1461
1462 # Get the Graphs of the ligands
1463 if model_graph is None:
1464 model_graph = _ResidueToGraph(model_ligand,
1465 by_atom_index=by_atom_index)
1466
1467 if not networkx.is_connected(model_graph):
1468 msg = "Disconnected graph for model ligand %s" % model_ligand
1469 raise DisconnectedGraphError(msg)
1470
1471
1472 if target_graph is None:
1473 target_graph = _ResidueToGraph(target_ligand,
1474 by_atom_index=by_atom_index)
1475
1476 if not networkx.is_connected(target_graph):
1477 msg = "Disconnected graph for target ligand %s" % target_ligand
1478 raise DisconnectedGraphError(msg)
1479
1480 # Note the argument order (model, target) which differs from spyrmsd.
1481 # This is because a subgraph of model is isomorphic to target - but not the
1482 # opposite as we only consider partial ligands in the reference.
1483 # Make sure to generate the symmetries correctly in the end
1484 gm = networkx.algorithms.isomorphism.GraphMatcher(
1485 model_graph, target_graph, node_match=lambda x, y:
1486 x["element"] == y["element"])
1487 if gm.is_isomorphic():
1488 if not return_symmetries:
1489 return True
1490 symmetries = []
1491 for i, isomorphism in enumerate(gm.isomorphisms_iter()):
1492 if i >= max_symmetries:
1494 "Too many symmetries between %s and %s" % (
1495 str(model_ligand), str(target_ligand)))
1496 symmetries.append((list(isomorphism.values()),
1497 list(isomorphism.keys())))
1498 assert len(symmetries) > 0
1499 LogVerbose("Found %s isomorphic mappings (symmetries)" % len(symmetries))
1500 elif gm.subgraph_is_isomorphic() and substructure_match:
1501 if not return_symmetries:
1502 return True
1503 symmetries = []
1504 for i, isomorphism in enumerate(gm.subgraph_isomorphisms_iter()):
1505 if i >= max_symmetries:
1507 "Too many symmetries between %s and %s" % (
1508 str(model_ligand), str(target_ligand)))
1509 symmetries.append((list(isomorphism.values()),
1510 list(isomorphism.keys())))
1511 assert len(symmetries) > 0
1512 # Assert that all the atoms in the target are part of the substructure
1513 assert len(symmetries[0][0]) == len(target_ligand.atoms)
1514 n_sym = len(symmetries)
1515 LogVerbose("Found %s subgraph isomorphisms (symmetries)" % n_sym)
1516 elif gm.subgraph_is_isomorphic():
1517 LogVerbose("Found subgraph isomorphisms (symmetries), but"
1518 " ignoring because substructure_match=False")
1519 raise NoIsomorphicSymmetryError("No symmetry between %s and %s" % (
1520 str(model_ligand), str(target_ligand)))
1521 else:
1522 LogVerbose("Found no isomorphic mappings (symmetries)")
1523 raise NoSymmetryError("No symmetry between %s and %s" % (
1524 str(model_ligand), str(target_ligand)))
1525
1526 return symmetries
1527
1528class NoSymmetryError(ValueError):
1529 """ Exception raised when no symmetry can be found.
1530 """
1531 pass
1532
1533class NoIsomorphicSymmetryError(ValueError):
1534 """ Exception raised when no isomorphic symmetry can be found
1535
1536 There would be isomorphic subgraphs for which symmetries can
1537 be found, but substructure_match is disabled
1538 """
1539 pass
1540
1541class TooManySymmetriesError(ValueError):
1542 """ Exception raised when too many symmetries are found.
1543 """
1544 pass
1545
1546class DisconnectedGraphError(Exception):
1547 """ Exception raised when the ligand graph is disconnected.
1548 """
1549 pass
1550
1551# specify public interface
1552__all__ = ('LigandScorer', 'ComputeSymmetries', 'NoSymmetryError',
1553 'NoIsomorphicSymmetryError', 'TooManySymmetriesError',
1554 'DisconnectedGraphError')
Protein or molecule.
definition of EntityView
_copy_ligand(self, l, ent, ed, rename_ligand_chain)
__init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, rename_ligand_chain=False, min_pep_length=6, min_nuc_length=4, pep_seqid_thr=95., nuc_seqid_thr=95., mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0., seqres=None, trg_seqres_mapping=None)
_compute(self, symmetries, target_ligand, model_ligand)
_ResidueToGraph(residue, by_atom_index=False)
ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)
EntityHandle DLLEXPORT_OST_MOL CreateEntityFromView(const EntityView &view, bool include_exlusive_atoms, EntityHandle handle=EntityHandle())
create new entity handle from entity view