OpenStructure
ligand_scoring_base.py
Go to the documentation of this file.
1 from contextlib import contextmanager
2 import numpy as np
3 import networkx
4 
5 import ost
6 from ost import mol
7 from ost import conop
8 from ost import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
9 from ost.mol.alg import chain_mapping
10 
11 @contextmanager
12 def _SinkVerbosityLevel(n=1):
13  """ Context manager to temporarily reduce the verbosity level by n.
14 
15  Example usage:
16  with _SinkVerbosityLevel(2):
17  LogVerbose("Test")
18  Will only write "Test" in script level (2 above)
19  """
20  PushVerbosityLevel(GetVerbosityLevel() - n)
21  try:
22  yield
23  except:
24  raise
25  finally:
27 
28 
29 def _QualifiedAtomNotation(a):
30  """Return a parsable string of the atom in the format:
31  ChainName.ResidueNumber.InsertionCode.AtomName."""
32  resnum = a.residue.number
33  return "{cname}.{rnum}.{ins_code}.{aname}".format(
34  cname=a.chain.name,
35  rnum=resnum.num,
36  ins_code=resnum.ins_code.strip("\u0000"),
37  aname=a.name,
38  )
39 
40 
41 def _QualifiedResidueNotation(r):
42  """Return a parsable string of the residue in the format:
43  ChainName.ResidueNumber.InsertionCode."""
44  resnum = r.number
45  return "{cname}.{rnum}.{ins_code}".format(
46  cname=r.chain.name,
47  rnum=resnum.num,
48  ins_code=resnum.ins_code.strip("\u0000"),
49  )
50 
52  """ Scorer to compute various small molecule ligand (non polymer) scores.
53 
54  :class:`LigandScorer` is an abstract base class dealing with all the setup,
55  data storage, enumerating ligand symmetries and target/model ligand
56  matching/assignment. But actual score computation is delegated to child
57  classes.
58 
59  At the moment, two such classes are available:
60 
61  * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
62  that assesses the conservation of protein-ligand
63  contacts (LDDT-PLI);
64  * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
65  that computes a binding-site superposed, symmetry-corrected RMSD
66  (BiSyRMSD) and ligand pocket LDDT (LDDT-LP).
67 
68  All versus all scores are available through the lazily computed
69  :attr:`score_matrix`. However, many things can go wrong... be it even
70  something as simple as two ligands not matching. Error states therefore
71  encode scoring issues. An Issue for a particular ligand is indicated by a
72  non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
73  This invalidates pairwise scores of such a ligand with all other ligands.
74  This and other issues in pairwise score computation are reported in
75  :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
76  Only if the respective location is 0, a valid pairwise score can be
77  expected. The states and their meaning can be explored with code::
78 
79  for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
80  print(state_code)
81  print(short_desc)
82  print(desc)
83 
84  A common use case is to derive a one-to-one mapping between ligands in
85  the model and the target for which :class:`LigandScorer` provides an
86  automated :attr:`assignment` procedure.
87  By default, only exact matches between target and model ligands are
88  considered. This is a problem when the target only contains a subset
89  of the expected atoms (for instance if atoms are missing in an
90  experimental structure, which often happens in the PDB). With
91  `substructure_match=True`, complete model ligands can be scored against
92  partial target ligands. One problem with this approach is that it is
93  very easy to find good matches to small, irrelevant ligands like EDO, CO2
94  or GOL. The assignment algorithm therefore considers the coverage,
95  expressed as the fraction of atoms of the model ligand atoms covered in the
96  target. Higher coverage matches are prioritized, but a match with a better
97  score will be preferred if it falls within a window of `coverage_delta`
98  (by default 0.2) of a worse-scoring match. As a result, for instance,
99  with a delta of 0.2, a low-score match with coverage 0.96 would be
100  preferred over a high-score match with coverage 0.70.
101 
102  Assumptions:
103 
104  Unlike most of OpenStructure, this class does not assume that the ligands
105  (either for the model or the target) are part of the PDB component
106  dictionary. They may have arbitrary residue names. Residue names do not
107  have to match between the model and the target. Matching is based on
108  the calculation of isomorphisms which depend on the atom element name and
109  atom connectivity (bond order is ignored).
110  It is up to the caller to ensure that the connectivity of atoms is properly
111  set before passing any ligands to this class. Ligands with improper
112  connectivity will lead to bogus results.
113 
114  This only applies to the ligand. The rest of the model and target
115  structures (protein, nucleic acids) must still follow the usual rules and
116  contain only residues from the compound library. Structures are cleaned up
117  according to constructor documentation. We advise to
118  use the :func:`ost.mol.alg.scoring_base.MMCIFPrep` and
119  :func:`ost.mol.alg.scoring_base.PDBPrep` for loading which already
120  clean hydrogens and, in the case of MMCIF, optionally extract ligands ready
121  to be used by the :class:`LigandScorer` based on "non-polymer" entity types.
122  In case of PDB file format, ligands must be loaded separately as SDF files.
123 
124  Only polymers (protein and nucleic acids) of model and target are considered
125  for ligand binding sites. The
126  :class:`ost.mol.alg.chain_mapping.ChainMapper` is used to enumerate possible
127  mappings of these chains. In short: identical chains in the target are
128  grouped based on pairwise sequence identity
129  (see pep_seqid_thr/nuc_seqid_thr param). Each model chain is assigned to
130  one of these groups (see mdl_map_pep_seqid_thr/mdl_map_nuc_seqid_thr param).
131  To avoid spurious matches, only polymers of a certain length are considered
132  in this matching procedure (see min_pep_length/min_nuc_length param).
133  Shorter polymers are never mapped and do not contribute to scoring.
134 
135  Here is an example of how to setup a scorer::
136 
137  from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
138  from ost.mol.alg.scoring_base import MMCIFPrep
139  from ost.mol.alg.scoring_base import PDBPrep
140 
141  # Load data
142  # Structure model in PDB format, containing the receptor only
143  model = PDBPrep("path_to_model.pdb")
144  # Ligand model as SDF file
145  model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
146  # Target loaded from mmCIF, containing the ligand
147  target, target_ligands = MMCIFPrep("path_to_target.cif",
148  extract_nonpoly=True)
149 
150  # Setup scorer object and compute SCRMSD
151  model_ligands = [model_ligand.Select("ele != H")]
152  sc = SCRMSDScorer(model, target, model_ligands, target_ligands)
153 
154  # Perform assignment and read respective scores
155  for lig_pair in sc.assignment:
156  trg_lig = sc.target_ligands[lig_pair[0]]
157  mdl_lig = sc.model_ligands[lig_pair[1]]
158  score = sc.score_matrix[lig_pair[0], lig_pair[1]]
159  print(f"Score for {trg_lig} and {mdl_lig}: {score}")
160 
161  # check cleanup in model and target structure:
162  print("model cleanup:", sc.model_cleanup_log)
163  print("target cleanup:", sc.target_cleanup_log)
164 
165 
166  :param model: Model structure - a deep copy is available as :attr:`model`.
167  The model undergoes the following cleanup steps which are
168  dependent on :class:`ost.conop.CompoundLib` returned by
169  :func:`ost.conop.GetDefaultLib`: 1) removal
170  of hydrogens, 2) removal of residues for which there is no
171  entry in :class:`ost.conop.CompoundLib`, 3) removal of
172  residues that are not peptide linking or nucleotide linking
173  according to :class:`ost.conop.CompoundLib` 4) removal of
174  atoms that are not defined for respective residues in
175  :class:`ost.conop.CompoundLib`. Except step 1), every cleanup
176  is logged with :class:`ost.LogLevel` Warning and a report is
177  available as :attr:`model_cleanup_log`.
178  :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
179  :param target: Target structure - same processing as *model*.
180  :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
181  :param model_ligands: Model ligands, as a list of
182  :class:`ost.mol.ResidueHandle`/
183  :class:`ost.mol.ResidueView`/
184  :class:`ost.mol.EntityHandle`/
185  :class:`ost.mol.EntityView`. For
186  :class:`ost.mol.EntityHandle`/
187  :class:`ost.mol.EntityView`, each residue is
188  considered to be an individual ligand.
189  All ligands are copied into a separate
190  :class:`ost.mol.EntityHandle` available as
191  :attr:`model_ligand_ent` and the respective
192  list of ligands is available as :attr:`model_ligands`.
193  :type model_ligands: :class:`list`
194  :param target_ligands: Target ligands, same processing as model ligands.
195  :type target_ligands: :class:`list`
196  :param resnum_alignments: Whether alignments between chemically equivalent
197  chains in *model* and *target* can be computed
198  based on residue numbers. This can be assumed in
199  benchmarking setups such as CAMEO/CASP.
200  :type resnum_alignments: :class:`bool`
201  :param substructure_match: Set this to True to allow incomplete (i.e.
202  partially resolved) target ligands.
203  :type substructure_match: :class:`bool`
204  :param coverage_delta: the coverage delta for partial ligand assignment.
205  :type coverage_delta: :class:`float`
206  :param max_symmetries: If more than that many isomorphisms exist for
207  a target-ligand pair, it will be ignored and reported
208  as unassigned.
209  :type max_symmetries: :class:`int`
210  :param min_pep_length: Relevant parameter if short peptides are involved in
211  the polymer binding site. Minimum peptide length for
212  a chain to be considered in chain mapping.
213  The chain mapping algorithm first performs an all vs.
214  all pairwise sequence alignment to identify \"equal\"
215  chains within the target structure. We go for simple
216  sequence identity there. Short sequences can be
217  problematic as they may produce high sequence identity
218  alignments by pure chance.
219  :type min_pep_length: :class:`int`
220  :param min_nuc_length: Same for nucleotides
221  :type min_nuc_length: :class:`int`
222  :param pep_seqid_thr: Parameter that affects identification of identical
223  chains in target - see
224  :class:`ost.mol.alg.chain_mapping.ChainMapper`
225  :type pep_seqid_thr: :class:`float`
226  :param nuc_seqid_thr: Parameter that affects identification of identical
227  chains in target - see
228  :class:`ost.mol.alg.chain_mapping.ChainMapper`
229  :type nuc_seqid_thr: :class:`float`
230  :param mdl_map_pep_seqid_thr: Parameter that affects mapping of model chains
231  to target chains - see
232  :class:`ost.mol.alg.chain_mapping.ChainMapper`
233  :type mdl_map_pep_seqid_thr: :class:`float`
234  :param mdl_map_nuc_seqid_thr: Parameter that affects mapping of model chains
235  to target chains - see
236  :class:`ost.mol.alg.chain_mapping.ChainMapper`
237  :type mdl_map_nuc_seqid_thr: :class:`float`
238  :param seqres: Parameter that affects identification of identical chains in
239  target - see :class:`ost.mol.alg.chain_mapping.ChainMapper`
240  :type seqres: :class:`ost.seq.SequenceList`
241  :param trg_seqres_mapping: Parameter that affects identification of identical
242  chains in target - see
243  :class:`ost.mol.alg.chain_mapping.ChainMapper`
244  :type trg_seqres_mapping: :class:`dict`
245  """
246 
247  def __init__(self, model, target, model_ligands, target_ligands,
248  resnum_alignments=False, substructure_match=False,
249  coverage_delta=0.2, max_symmetries=1e5,
250  rename_ligand_chain=False, min_pep_length = 6,
251  min_nuc_length = 4, pep_seqid_thr = 95.,
252  nuc_seqid_thr = 95.,
253  mdl_map_pep_seqid_thr = 0.,
254  mdl_map_nuc_seqid_thr = 0.,
255  seqres = None,
256  trg_seqres_mapping = None):
257 
258  if isinstance(model, mol.EntityView):
259  self._model_model = mol.CreateEntityFromView(model, False)
260  elif isinstance(model, mol.EntityHandle):
261  self._model_model = model.Copy()
262  else:
263  raise RuntimeError("model must be of type EntityView/EntityHandle")
264 
265  if isinstance(target, mol.EntityView):
266  self._target_target = mol.CreateEntityFromView(target, False)
267  elif isinstance(target, mol.EntityHandle):
268  self._target_target = target.Copy()
269  else:
270  raise RuntimeError("target must be of type EntityView/EntityHandle")
271 
272  clib = conop.GetDefaultLib()
273  if not clib:
274  ost.LogError("A compound library is required. "
275  "Please refer to the OpenStructure website: "
276  "https://openstructure.org/docs/conop/compoundlib/.")
277  raise RuntimeError("No compound library found")
278  self._target_target, self._target_cleanup_log_target_cleanup_log = \
279  self._cleanup_polymer_ent_cleanup_polymer_ent(self._target_target, clib)
280  self._model_model, self._model_cleanup_log_model_cleanup_log = \
281  self._cleanup_polymer_ent_cleanup_polymer_ent(self._model_model, clib)
282 
283  # keep ligands separate from polymer entities
284  self._target_ligand_ent_target_ligand_ent = mol.CreateEntity()
285  self._model_ligand_ent_model_ligand_ent = mol.CreateEntity()
286  target_ligand_ent_ed = self._target_ligand_ent_target_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
287  model_ligand_ent_ed = self._model_ligand_ent_model_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
288 
289  self._target_ligands_target_ligands = list()
290  for l in target_ligands:
291  if isinstance(l, mol.EntityView) or isinstance(l, mol.EntityHandle):
292  for r in l.residues:
293  self._target_ligands_target_ligands.append(self._copy_ligand_copy_ligand(r, self._target_ligand_ent_target_ligand_ent,
294  target_ligand_ent_ed,
295  rename_ligand_chain))
296  elif isinstance(l, mol.ResidueHandle) or isinstance(l, mol.ResidueView):
297  self._target_ligands_target_ligands.append(self._copy_ligand_copy_ligand(l, self._target_ligand_ent_target_ligand_ent,
298  target_ligand_ent_ed,
299  rename_ligand_chain))
300  else:
301  raise RuntimeError("ligands must be of type EntityView/"
302  "EntityHandle/ResidueView/ResidueHandle")
303 
304  self._model_ligands_model_ligands = list()
305  for l in model_ligands:
306  if isinstance(l, mol.EntityView) or isinstance(l, mol.EntityHandle):
307  for r in l.residues:
308  self._model_ligands_model_ligands.append(self._copy_ligand_copy_ligand(r, self._model_ligand_ent_model_ligand_ent,
309  model_ligand_ent_ed,
310  rename_ligand_chain))
311  elif isinstance(l, mol.ResidueHandle) or isinstance(l, mol.ResidueView):
312  self._model_ligands_model_ligands.append(self._copy_ligand_copy_ligand(l, self._model_ligand_ent_model_ligand_ent,
313  model_ligand_ent_ed,
314  rename_ligand_chain))
315  else:
316  raise RuntimeError("ligands must be of type EntityView/"
317  "EntityHandle/ResidueView/ResidueHandle")
318 
319 
320  if len(self.model_ligandsmodel_ligands) == 0:
321  LogWarning("No ligands in the model")
322  if len(self.target_ligandstarget_ligands) == 0:
323  raise ValueError("No ligand in the model and in the target")
324 
325  self._resnum_alignments_resnum_alignments = resnum_alignments
326  self._substructure_match_substructure_match = substructure_match
327  self._coverage_delta_coverage_delta = coverage_delta
328  self._max_symmetries_max_symmetries = max_symmetries
329  self._min_pep_length_min_pep_length = min_pep_length
330  self._min_nuc_length_min_nuc_length = min_nuc_length
331  self._pep_seqid_thr_pep_seqid_thr = pep_seqid_thr
332  self._nuc_seqid_thr_nuc_seqid_thr = nuc_seqid_thr
333  self._mdl_map_pep_seqid_thr_mdl_map_pep_seqid_thr = mdl_map_pep_seqid_thr
334  self._mdl_map_nuc_seqid_thr_mdl_map_nuc_seqid_thr = mdl_map_nuc_seqid_thr
335  self._seqres_seqres = seqres
336  self._trg_seqres_mapping_trg_seqres_mapping = trg_seqres_mapping
337 
338  # lazily computed attributes
339  self.__chain_mapper__chain_mapper = None
340  self.__chem_mapping__chem_mapping = None
341  self.__chem_group_alns__chem_group_alns = None
342  self.__ref_mdl_alns__ref_mdl_alns = None
343  self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping = None
344  self.__chain_mapping_mdl__chain_mapping_mdl = None
345 
346  # keep track of states
347  # simple integers instead of enums - encoding available in
348  # self.state_decoding
349  self._state_matrix_state_matrix = None
350  self._model_ligand_states_model_ligand_states = None
351  self._target_ligand_states_target_ligand_states = None
352 
353  # score matrices
354  self._score_matrix_score_matrix = None
355  self._coverage_matrix_coverage_matrix = None
356  self._aux_matrix_aux_matrix = None
357 
358  # assignment and derived data
359  self._assignment_assignment = None
360  self._score_dict_score_dict = None
361  self._aux_dict_aux_dict = None
362 
363  # human readable description of states - child class must extend with
364  # with child class specific states
365  # each state code comes with a tuple of two elements:
366  # 1) short description 2) human readable description
367  # The actual states are set in _compute_scores in :class:`LigandScorer`
368  # or _compute_score of the child class.
369  if self.substructure_matchsubstructure_match:
370  iso = "subgraph isomorphism"
371  else:
372  iso = "full graph isomorphism"
373 
374  self.state_decodingstate_decoding = \
375  {0: ("OK", "OK"),
376  1: ("identity", f"Ligands could not be matched (by {iso})"),
377  2: ("symmetries", "Too many symmetries between ligand atoms were "
378  "found - increasing max_symmetries might help"),
379  3: ("no_iso", "No full isomorphic match could be found - enabling "
380  "substructure_match might allow a match"),
381  4: ("disconnected", "Ligand graph is disconnected"),
382  5: ("stoichiometry", "Ligand was already assigned to another ligand "
383  "(different stoichiometry)"),
384  6: ("single_ligand_issue", "Cannot compute valid pairwise score as "
385  "either model or target ligand have non-zero state."),
386  9: ("unknown", "An unknown error occured in LigandScorer")}
387 
388 
389  @property
390  def model(self):
391  """ Model receptor structure
392 
393  Processed according to docs in :class:`LigandScorer` constructor
394  """
395  return self._model_model
396 
397  @property
398  def target(self):
399  """ Target receptor structure
400 
401  Processed according to docs in :class:`LigandScorer` constructor
402  """
403  return self._target_target
404 
405  @property
406  def model_cleanup_log(self):
407  """ Reports residues/atoms that were removed in model during cleanup
408 
409  Residues and atoms are described as :class:`str` in format
410  <chain_name>.<resnum>.<ins_code> (residue) and
411  <chain_name>.<resnum>.<ins_code>.<aname> (atom).
412 
413  :class:`dict` with keys:
414 
415  * 'cleaned_residues': another :class:`dict` with keys:
416 
417  * 'no_clib': residues that have been removed because no entry could be
418  found in :class:`ost.conop.CompoundLib`
419  * 'not_linking': residues that have been removed because they're not
420  peptide or nucleotide linking according to
421  :class:`ost.conop.CompoundLib`
422 
423  * 'cleaned_atoms': another :class:`dict` with keys:
424 
425  * 'unknown_atoms': atoms that have been removed as they're not part
426  of their respective residue according to
427  :class:`ost.conop.CompoundLib`
428  """
429  return self._model_cleanup_log_model_cleanup_log
430 
431  @property
433  """ Same for target
434  """
435  return self._target_cleanup_log_target_cleanup_log
436 
437  @property
438  def model_ligands(self):
439  """ Residues representing model ligands
440 
441  :class:`list` of :class:`ost.mol.ResidueHandle`
442  """
443  return self._model_ligands_model_ligands
444 
445  @property
446  def target_ligands(self):
447  """ Residues representing target ligands
448 
449  :class:`list` of :class:`ost.mol.ResidueHandle`
450  """
451  return self._target_ligands_target_ligands
452 
453  @property
454  def resnum_alignments(self):
455  """ Given at :class:`LigandScorer` construction
456  """
457  return self._resnum_alignments_resnum_alignments
458 
459  @property
460  def min_pep_length(self):
461  """ Given at :class:`LigandScorer` construction
462  """
463  return self._min_pep_length_min_pep_length
464 
465  @property
466  def min_nuc_length(self):
467  """ Given at :class:`LigandScorer` construction
468  """
469  return self._min_nuc_length_min_nuc_length
470 
471  @property
472  def pep_seqid_thr(self):
473  """ Given at :class:`LigandScorer` construction
474  """
475  return self._pep_seqid_thr_pep_seqid_thr
476 
477  @property
478  def nuc_seqid_thr(self):
479  """ Given at :class:`LigandScorer` construction
480  """
481  return self._nuc_seqid_thr_nuc_seqid_thr
482 
483  @property
485  """ Given at :class:`LigandScorer` construction
486  """
487  return self._mdl_map_pep_seqid_thr_mdl_map_pep_seqid_thr
488 
489  @property
491  """ Given at :class:`LigandScorer` construction
492  """
493  return self._mdl_map_nuc_seqid_thr_mdl_map_nuc_seqid_thr
494 
495  @property
496  def seqres(self):
497  """ Given at :class:`LigandScorer` construction
498  """
499  return self._seqres_seqres
500 
501  @property
503  """ Given at :class:`LigandScorer` construction
504  """
505  return self._trg_seqres_mapping_trg_seqres_mapping
506 
507  @property
509  """ Given at :class:`LigandScorer` construction
510  """
511  return self._substructure_match_substructure_match
512 
513  @property
514  def coverage_delta(self):
515  """ Given at :class:`LigandScorer` construction
516  """
517  return self._coverage_delta_coverage_delta
518 
519  @property
520  def max_symmetries(self):
521  """ Given at :class:`LigandScorer` construction
522  """
523  return self._max_symmetries_max_symmetries
524 
525  @property
526  def state_matrix(self):
527  """ Encodes states of ligand pairs
528 
529  Ligand pairs can be matched and a valid score can be expected if
530  respective location in this matrix is 0.
531  Target ligands are in rows, model ligands in columns. States are encoded
532  as integers <= 9. Larger numbers encode errors for child classes.
533  Use something like ``self.state_decoding[3]`` to get a decscription.
534 
535  :rtype: :class:`~numpy.ndarray`
536  """
537  if self._state_matrix_state_matrix is None:
538  self._compute_scores_compute_scores()
539  return self._state_matrix_state_matrix
540 
541  @property
543  """ Encodes states of model ligands
544 
545  Non-zero state in any of the model ligands invalidates the full
546  respective column in :attr:`~state_matrix`.
547 
548  :rtype: :class:`~numpy.ndarray`
549  """
550  if self._model_ligand_states_model_ligand_states is None:
551  self._compute_scores_compute_scores()
552  return self._model_ligand_states_model_ligand_states
553 
554  @property
556  """ Encodes states of target ligands
557 
558  Non-zero state in any of the target ligands invalidates the full
559  respective row in :attr:`~state_matrix`.
560 
561  :rtype: :class:`~numpy.ndarray`
562  """
563  if self._target_ligand_states_target_ligand_states is None:
564  self._compute_scores_compute_scores()
565  return self._target_ligand_states_target_ligand_states
566 
567  @property
568  def score_matrix(self):
569  """ Get the matrix of scores.
570 
571  Target ligands are in rows, model ligands in columns.
572 
573  NaN values indicate that no value could be computed (i.e. different
574  ligands). In other words: values are only valid if the respective
575  location in :attr:`~state_matrix` is 0.
576 
577  :rtype: :class:`~numpy.ndarray`
578  """
579  if self._score_matrix_score_matrix is None:
580  self._compute_scores_compute_scores()
581  return self._score_matrix_score_matrix
582 
583  @property
584  def coverage_matrix(self):
585  """ Get the matrix of model ligand atom coverage in the target.
586 
587  Target ligands are in rows, model ligands in columns.
588 
589  NaN values indicate that no value could be computed (i.e. different
590  ligands). In other words: values are only valid if the respective
591  location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
592  only full match isomorphisms are considered, and therefore only values
593  of 1.0 can be observed.
594 
595  :rtype: :class:`~numpy.ndarray`
596  """
597  if self._coverage_matrix_coverage_matrix is None:
598  self._compute_scores_compute_scores()
599  return self._coverage_matrix_coverage_matrix
600 
601  @property
602  def aux_matrix(self):
603  """ Get the matrix of scorer specific auxiliary data.
604 
605  Target ligands are in rows, model ligands in columns.
606 
607  Auxiliary data consists of arbitrary data dicts which allow a child
608  class to provide additional information for a scored ligand pair.
609  empty dictionaries indicate that the child class simply didn't return
610  anything or that no value could be computed (e.g. different ligands).
611  In other words: values are only valid if respective location in the
612  :attr:`~state_matrix` is 0.
613 
614  :rtype: :class:`~numpy.ndarray`
615  """
616  if self._aux_matrix_aux_matrix is None:
617  self._compute_scores_compute_scores()
618  return self._aux_matrix_aux_matrix
619 
620  @property
621  def assignment(self):
622  """ Ligand assignment based on computed scores
623 
624  Implements a greedy algorithm to assign target and model ligands
625  with each other. Starts from each valid ligand pair as indicated
626  by a state of 0 in :attr:`state_matrix`. Each iteration first selects
627  high coverage pairs. Given max_coverage defined as the highest
628  coverage observed in the available pairs, all pairs with coverage
629  in [max_coverage-*coverage_delta*, max_coverage] are selected.
630  The best scoring pair among those is added to the assignment
631  and the whole process is repeated until there are no ligands to
632  assign anymore.
633 
634  :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
635  """
636  if self._assignment_assignment is None:
637  self._assignment_assignment = list()
638  # Build working array that contains tuples for all mdl/trg ligand
639  # pairs with valid score as indicated by a state of 0:
640  # (score, coverage, trg_ligand_idx, mdl_ligand_idx)
641  tmp = list()
642  for trg_idx in range(self.score_matrixscore_matrix.shape[0]):
643  for mdl_idx in range(self.score_matrixscore_matrix.shape[1]):
644  if self.state_matrixstate_matrix[trg_idx, mdl_idx] == 0:
645  tmp.append((self.score_matrixscore_matrix[trg_idx, mdl_idx],
646  self.coverage_matrixcoverage_matrix[trg_idx, mdl_idx],
647  trg_idx, mdl_idx))
648 
649  # sort by score, such that best scoring item is in front
650  if self._score_dir_score_dir() == '+':
651  tmp.sort(reverse=True)
652  elif self._score_dir_score_dir() == '-':
653  tmp.sort()
654  else:
655  raise RuntimeError("LigandScorer._score_dir must return one in "
656  "['+', '-']")
657 
658  LogScript("Computing ligand assignment")
659  while len(tmp) > 0:
660  # select high coverage ligand pairs in working array
661  coverage_thresh = max([x[1] for x in tmp]) - self.coverage_deltacoverage_delta
662  top_coverage = [x for x in tmp if x[1] >= coverage_thresh]
663 
664  # working array is sorted by score => just pick first one
665  a = top_coverage[0][2] # selected trg_ligand_idx
666  b = top_coverage[0][3] # selected mdl_ligand_idx
667  self._assignment_assignment.append((a, b))
668 
669  # kick out remaining pairs involving these ligands
670  tmp = [x for x in tmp if (x[2] != a and x[3] != b)]
671 
672  return self._assignment_assignment
673 
674  @property
675  def score(self):
676  """ Get a dictionary of score values, keyed by model ligand
677 
678  Extract score with something like:
679  ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
680  The returned scores are based on :attr:`~assignment`.
681 
682  :rtype: :class:`dict`
683  """
684  if self._score_dict_score_dict is None:
685  self._score_dict_score_dict = dict()
686  for (trg_lig_idx, mdl_lig_idx) in self.assignmentassignment:
687  mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
688  cname = mdl_lig.GetChain().GetName()
689  rnum = mdl_lig.GetNumber()
690  if cname not in self._score_dict_score_dict:
691  self._score_dict_score_dict[cname] = dict()
692  score = self.score_matrixscore_matrix[trg_lig_idx, mdl_lig_idx]
693  self._score_dict_score_dict[cname][rnum] = score
694  return self._score_dict_score_dict
695 
696  @property
697  def aux(self):
698  """ Get a dictionary of score details, keyed by model ligand
699 
700  Extract dict with something like:
701  ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
702  The returned info dicts are based on :attr:`~assignment`. The content is
703  documented in the respective child class.
704 
705  :rtype: :class:`dict`
706  """
707  if self._aux_dict_aux_dict is None:
708  self._aux_dict_aux_dict = dict()
709  for (trg_lig_idx, mdl_lig_idx) in self.assignmentassignment:
710  mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
711  cname = mdl_lig.GetChain().GetName()
712  rnum = mdl_lig.GetNumber()
713  if cname not in self._aux_dict_aux_dict:
714  self._aux_dict_aux_dict[cname] = dict()
715  d = self.aux_matrixaux_matrix[trg_lig_idx, mdl_lig_idx]
716  self._aux_dict_aux_dict[cname][rnum] = d
717  return self._aux_dict_aux_dict
718 
719 
720  @property
722  """ Get indices of target ligands which are not assigned
723 
724  :rtype: :class:`list` of :class:`int`
725  """
726  # compute on-the-fly, no need for caching
727  assigned = set([x[0] for x in self.assignmentassignment])
728  return [x for x in range(len(self.target_ligandstarget_ligands)) if x not in assigned]
729 
730  @property
732  """ Get indices of model ligands which are not assigned
733 
734  :rtype: :class:`list` of :class:`int`
735  """
736  # compute on-the-fly, no need for caching
737  assigned = set([x[1] for x in self.assignmentassignment])
738  return [x for x in range(len(self.model_ligandsmodel_ligands)) if x not in assigned]
739 
740  def get_target_ligand_state_report(self, trg_lig_idx):
741  """ Get summary of states observed with respect to all model ligands
742 
743  Mainly for debug purposes
744 
745  :param trg_lig_idx: Index of target ligand for which report should be
746  generated
747  :type trg_lig_idx: :class:`int`
748  """
749  return self._get_report_get_report(self.target_ligand_statestarget_ligand_states[trg_lig_idx],
750  self.state_matrixstate_matrix[trg_lig_idx,:])
751 
752  def get_model_ligand_state_report(self, mdl_lig_idx):
753  """ Get summary of states observed with respect to all target ligands
754 
755  Mainly for debug purposes
756 
757  :param mdl_lig_idx: Index of model ligand for which report should be
758  generated
759  :type mdl_lig_idx: :class:`int`
760  """
761  return self._get_report_get_report(self.model_ligand_statesmodel_ligand_states[mdl_lig_idx],
762  self.state_matrixstate_matrix[:, mdl_lig_idx])
763 
764  def _get_report(self, ligand_state, pair_states):
765  """ Helper
766  """
767  pair_report = list()
768  for s in np.unique(pair_states):
769  desc = self.state_decodingstate_decoding[s]
770  indices = np.flatnonzero(pair_states == s).tolist()
771  pair_report.append({"state": s,
772  "short desc": desc[0],
773  "desc": desc[1],
774  "indices": indices})
775 
776  desc = self.state_decodingstate_decoding[ligand_state]
777  ligand_report = {"state": ligand_state,
778  "short desc": desc[0],
779  "desc": desc[1]}
780 
781  return (ligand_report, pair_report)
782 
784  """ Makes an educated guess why target ligand is not assigned
785 
786  This either returns actual error states or custom states that are
787  derived from them. Currently, the following reasons are reported:
788 
789  * `no_ligand`: there was no ligand in the model.
790  * `disconnected`: the ligand graph was disconnected.
791  * `identity`: the ligand was not found in the model (by graph
792  isomorphism). Check your ligand connectivity.
793  * `no_iso`: no full isomorphic match could be found. Try enabling
794  `substructure_match=True` if the target ligand is incomplete.
795  * `symmetries`: too many symmetries were found (by graph isomorphisms).
796  Try to increase `max_symmetries`.
797  * `stoichiometry`: there was a possible assignment in the model, but
798  the model ligand was already assigned to a different target ligand.
799  This indicates different stoichiometries.
800  * `no_contact` (LDDT-PLI only): There were no LDDT contacts between
801  the binding site and the ligand, and LDDT-PLI is undefined.
802  * `target_binding_site` (SCRMSD only): no polymer residues were in
803  proximity of the target ligand.
804  * `model_binding_site` (SCRMSD only): the binding site was not found
805  in the model. Either the binding site was not modeled or the model
806  ligand was positioned too far in combination with
807  `full_bs_search=False`.
808 
809  :param trg_lig_idx: Index of target ligand
810  :type trg_lig_idx: :class:`int`
811  :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
812  sentence describing the issue, (\"unknown\",\"unknown\") if
813  nothing obvious can be found.
814  :raises: :class:`RuntimeError` if specified target ligand is assigned
815  """
816  if trg_lig_idx not in self.unassigned_target_ligandsunassigned_target_ligands:
817  raise RuntimeError("Specified target ligand is not unassigned")
818 
819  # hardcoded tuple if there is simply nothing we can assign it to
820  if len(self.model_ligandsmodel_ligands) == 0:
821  return ("no_ligand", "No ligand in the model")
822 
823  # if something with the ligand itself is wrong, we can be pretty sure
824  # thats why the ligand is unassigned
825  if self.target_ligand_statestarget_ligand_states[trg_lig_idx] != 0:
826  return self.state_decodingstate_decoding[self.target_ligand_statestarget_ligand_states[trg_lig_idx]]
827 
828  # The next best guess comes from looking at pair states
829  tmp = np.unique(self.state_matrixstate_matrix[trg_lig_idx,:])
830 
831  # In case of any 0, it could have been assigned so it's probably
832  # just not selected due to different stoichiometry - this is no
833  # defined state, we just return a hardcoded tuple in this case
834  if 0 in tmp:
835  return ("stoichiometry",
836  "Ligand was already assigned to an other "
837  "model ligand (different stoichiometry)")
838 
839  # maybe it's a symmetry issue?
840  if 2 in tmp:
841  return self.state_decodingstate_decoding[2]
842 
843  # if the state is 6 (single_ligand_issue), there is an issue with its
844  # target counterpart.
845  if 6 in tmp:
846  mdl_idx = np.where(self.state_matrixstate_matrix[trg_lig_idx,:]==6)[0]
847  for i in mdl_idx:
848  if self.model_ligand_statesmodel_ligand_states[i] == 0:
849  raise RuntimeError("This should never happen")
850  if self.model_ligand_statesmodel_ligand_states[i] != 4 or len(tmp) == 1:
851  # Don't report disconnected if only 1 model ligand is
852  # disconnected, unless that's the only reason
853  return self.state_decodingstate_decoding[self.model_ligand_statesmodel_ligand_states[i]]
854 
855  # get rid of remaining single ligand issues (only disconnected error)
856  if 6 in tmp and len(tmp) > 1:
857  tmp = tmp[tmp!=6]
858 
859  # prefer everything over identity state
860  if 1 in tmp and len(tmp) > 1:
861  tmp = tmp[tmp!=1]
862 
863  # just return whatever is left
864  return self.state_decodingstate_decoding[tmp[0]]
865 
866 
867  def guess_model_ligand_unassigned_reason(self, mdl_lig_idx):
868  """ Makes an educated guess why model ligand is not assigned
869 
870  This either returns actual error states or custom states that are
871  derived from them. Currently, the following reasons are reported:
872 
873  * `no_ligand`: there was no ligand in the target.
874  * `disconnected`: the ligand graph is disconnected.
875  * `identity`: the ligand was not found in the target (by graph or
876  subgraph isomorphism). Check your ligand connectivity.
877  * `no_iso`: no full isomorphic match could be found. Try enabling
878  `substructure_match=True` if the target ligand is incomplete.
879  * `symmetries`: too many symmetries were found (by graph isomorphisms).
880  Try to increase `max_symmetries`.
881  * `stoichiometry`: there was a possible assignment in the target, but
882  the model target was already assigned to a different model ligand.
883  This indicates different stoichiometries.
884  * `no_contact` (LDDT-PLI only): There were no LDDT contacts between
885  the binding site and the ligand, and LDDT-PLI is undefined.
886  * `target_binding_site` (SCRMSD only): a potential assignment was found
887  in the target, but there were no polymer residues in proximity of the
888  ligand in the target.
889  * `model_binding_site` (SCRMSD only): a potential assignment was
890  found in the target, but no binding site was found in the model.
891  Either the binding site was not modeled or the model ligand was
892  positioned too far in combination with `full_bs_search=False`.
893 
894  :param mdl_lig_idx: Index of model ligand
895  :type mdl_lig_idx: :class:`int`
896  :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
897  sentence describing the issue, (\"unknown\",\"unknown\") if
898  nothing obvious can be found.
899  :raises: :class:`RuntimeError` if specified model ligand is assigned
900  """
901  if mdl_lig_idx not in self.unassigned_model_ligandsunassigned_model_ligands:
902  raise RuntimeError("Specified model ligand is not unassigned")
903 
904  # hardcoded tuple if there is simply nothing we can assign it to
905  if len(self.target_ligandstarget_ligands) == 0:
906  return ("no_ligand", "No ligand in the target")
907 
908  # if something with the ligand itself is wrong, we can be pretty sure
909  # thats why the ligand is unassigned
910  if self.model_ligand_statesmodel_ligand_states[mdl_lig_idx] != 0:
911  return self.state_decodingstate_decoding[self.model_ligand_statesmodel_ligand_states[mdl_lig_idx]]
912 
913  # The next best guess comes from looking at pair states
914  tmp = np.unique(self.state_matrixstate_matrix[:,mdl_lig_idx])
915 
916  # In case of any 0, it could have been assigned so it's probably
917  # just not selected due to different stoichiometry - this is no
918  # defined state, we just return a hardcoded tuple in this case
919  if 0 in tmp:
920  return ("stoichiometry",
921  "Ligand was already assigned to an other "
922  "model ligand (different stoichiometry)")
923 
924  # maybe its a symmetry issue?
925  if 2 in tmp:
926  return self.state_decodingstate_decoding[2]
927 
928  # if the state is 6 (single_ligand_issue), there is an issue with its
929  # target counterpart.
930  if 6 in tmp:
931  trg_idx = np.where(self.state_matrixstate_matrix[:,mdl_lig_idx]==6)[0]
932  for i in trg_idx:
933  if self.target_ligand_statestarget_ligand_states[i] == 0:
934  raise RuntimeError("This should never happen")
935  if self.target_ligand_statestarget_ligand_states[i] != 4 or len(tmp) == 1:
936  # Don't report disconnected if only 1 target ligand is
937  # disconnected, unless that's the only reason
938  return self.state_decodingstate_decoding[self.target_ligand_statestarget_ligand_states[i]]
939 
940  # get rid of remaining single ligand issues (only disconnected error)
941  if 6 in tmp and len(tmp) > 1:
942  tmp = tmp[tmp!=6]
943 
944  # prefer everything over identity state
945  if 1 in tmp and len(tmp) > 1:
946  tmp = tmp[tmp!=1]
947 
948  # just return whatever is left
949  return self.state_decodingstate_decoding[tmp[0]]
950 
951  @property
953  return_dict = dict()
954  for i in self.unassigned_model_ligandsunassigned_model_ligands:
955  lig = self.model_ligandsmodel_ligands[i]
956  cname = lig.GetChain().GetName()
957  rnum = lig.GetNumber()
958  if cname not in return_dict:
959  return_dict[cname] = dict()
960  return_dict[cname][rnum] = \
961  self.guess_model_ligand_unassigned_reasonguess_model_ligand_unassigned_reason(i)[0]
962  return return_dict
963 
964  @property
966  return_dict = dict()
967  for i in self.unassigned_target_ligandsunassigned_target_ligands:
968  lig = self.target_ligandstarget_ligands[i]
969  cname = lig.GetChain().GetName()
970  rnum = lig.GetNumber()
971  if cname not in return_dict:
972  return_dict[cname] = dict()
973  return_dict[cname][rnum] = \
974  self.guess_target_ligand_unassigned_reasonguess_target_ligand_unassigned_reason(i)[0]
975  return return_dict
976 
977  @property
978  def _chain_mapper(self):
979  """ Chain mapper object for the given :attr:`target`.
980 
981  Can be used by child classes if needed, constructed with
982  *resnum_alignments* flag
983 
984  :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
985  """
986  if self.__chain_mapper__chain_mapper is None:
987  with _SinkVerbosityLevel():
988  self.__chain_mapper__chain_mapper = \
990  n_max_naive=1e9,
991  resnum_alignments=self.resnum_alignmentsresnum_alignments,
992  min_pep_length=self.min_pep_lengthmin_pep_length,
993  min_nuc_length=self.min_nuc_lengthmin_nuc_length,
994  pep_seqid_thr=self.pep_seqid_thrpep_seqid_thr,
995  nuc_seqid_thr=self.nuc_seqid_thrnuc_seqid_thr,
996  mdl_map_pep_seqid_thr=self.mdl_map_pep_seqid_thrmdl_map_pep_seqid_thr,
997  mdl_map_nuc_seqid_thr=self.mdl_map_nuc_seqid_thrmdl_map_nuc_seqid_thr,
998  seqres = self.seqresseqres,
999  trg_seqres_mapping = self.trg_seqres_mappingtrg_seqres_mapping)
1000  return self.__chain_mapper__chain_mapper
1001 
1002  @property
1003  def _chem_mapping(self):
1004  if self.__chem_mapping__chem_mapping is None:
1005  self.__chem_mapping__chem_mapping, self.__chem_group_alns__chem_group_alns, \
1006  self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl__chain_mapping_mdl = \
1007  self._chain_mapper_chain_mapper.GetChemMapping(self.modelmodel)
1008  return self.__chem_mapping__chem_mapping
1009 
1010  @property
1011  def _chem_group_alns(self):
1012  if self.__chem_group_alns__chem_group_alns is None:
1013  self.__chem_mapping__chem_mapping, self.__chem_group_alns__chem_group_alns, \
1014  self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl__chain_mapping_mdl = \
1015  self._chain_mapper_chain_mapper.GetChemMapping(self.modelmodel)
1016  return self.__chem_group_alns__chem_group_alns
1017 
1018  @property
1019  def _ref_mdl_alns(self):
1020  if self.__ref_mdl_alns__ref_mdl_alns is None:
1021  self.__ref_mdl_alns__ref_mdl_alns = \
1022  chain_mapping._GetRefMdlAlns(self._chain_mapper_chain_mapper.chem_groups,
1023  self._chain_mapper_chain_mapper.chem_group_alignments,
1024  self._chem_mapping_chem_mapping,
1025  self._chem_group_alns_chem_group_alns)
1026  return self.__ref_mdl_alns__ref_mdl_alns
1027 
1028  @property
1029  def _chain_mapping_mdl(self):
1030  if self.__chain_mapping_mdl__chain_mapping_mdl is None:
1031  with _SinkVerbosityLevel():
1032  self.__chem_mapping__chem_mapping, self.__chem_group_alns__chem_group_alns, \
1033  self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl__chain_mapping_mdl = \
1034  self._chain_mapper_chain_mapper.GetChemMapping(self.modelmodel)
1035  return self.__chain_mapping_mdl__chain_mapping_mdl
1036 
1037  @property
1038  def _mdl_chains_without_chem_mapping(self):
1039  if self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping is None:
1040  self.__chem_mapping__chem_mapping, self.__chem_group_alns__chem_group_alns, \
1041  self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping, self.__chain_mapping_mdl__chain_mapping_mdl = \
1042  self._chain_mapper_chain_mapper.GetChemMapping(self.modelmodel)
1043  return self.__mdl_chains_without_chem_mapping__mdl_chains_without_chem_mapping
1044 
1045  def _compute_scores(self):
1046  """
1047  Compute score for every possible target-model ligand pair and store the
1048  result in internal matrices.
1049  """
1050 
1053  shape = (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands))
1054  self._score_matrix_score_matrix = np.full(shape, np.nan, dtype=np.float32)
1055  self._coverage_matrix_coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
1056  self._state_matrix_state_matrix = np.full(shape, 0, dtype=np.int32)
1057  self._model_ligand_states_model_ligand_states = np.zeros(len(self.model_ligandsmodel_ligands))
1058  self._target_ligand_states_target_ligand_states = np.zeros(len(self.target_ligandstarget_ligands))
1059  self._aux_matrix_aux_matrix = np.empty(shape, dtype=dict)
1060 
1061  # precompute ligand graphs
1062  target_graphs = \
1063  [_ResidueToGraph(l, by_atom_index=True) for l in self.target_ligandstarget_ligands]
1064  model_graphs = \
1065  [_ResidueToGraph(l, by_atom_index=True) for l in self.model_ligandsmodel_ligands]
1066 
1067  for g_idx, g in enumerate(target_graphs):
1068  if not networkx.is_connected(g):
1069  self._target_ligand_states_target_ligand_states[g_idx] = 4
1070  self._state_matrix_state_matrix[g_idx,:] = 6
1071  msg = "Disconnected graph observed for target ligand "
1072  msg += str(self.target_ligandstarget_ligands[g_idx])
1073  LogWarning(msg)
1074 
1075  for g_idx, g in enumerate(model_graphs):
1076  if not networkx.is_connected(g):
1077  self._model_ligand_states_model_ligand_states[g_idx] = 4
1078  self._state_matrix_state_matrix[:,g_idx] = 6
1079  msg = "Disconnected graph observed for model ligand "
1080  msg += str(self.model_ligandsmodel_ligands[g_idx])
1081  LogWarning(msg)
1082 
1083 
1084  LogScript("Computing pairwise scores for all %s x %s ligands" % shape)
1085  for target_id, target_ligand in enumerate(self.target_ligandstarget_ligands):
1086  LogInfo("Analyzing target ligand %s" % target_ligand)
1087 
1088  if self._target_ligand_states_target_ligand_states[target_id] == 4:
1089  # Disconnected graph - already updated states and reported
1090  # to LogVerbose
1091  continue
1092 
1093  for model_id, model_ligand in enumerate(self.model_ligandsmodel_ligands):
1094  LogInfo("Comparing to model ligand %s" % model_ligand)
1095 
1096 
1099 
1100  if self._model_ligand_states_model_ligand_states[model_id] == 4:
1101  # Disconnected graph - already updated states and reported
1102  # to LogVerbose
1103  continue
1104 
1105  try:
1106  symmetries = ComputeSymmetries(
1107  model_ligand, target_ligand,
1108  substructure_match=self.substructure_matchsubstructure_match,
1109  by_atom_index=True,
1110  max_symmetries=self.max_symmetriesmax_symmetries,
1111  model_graph = model_graphs[model_id],
1112  target_graph = target_graphs[target_id])
1113  LogInfo("Ligands %s and %s symmetry match" % (
1114  str(model_ligand), str(target_ligand)))
1115  except NoSymmetryError:
1116  # Ligands are different - skip
1117  LogInfo("No symmetry between %s and %s" % (
1118  str(model_ligand), str(target_ligand)))
1119  self._state_matrix_state_matrix[target_id, model_id] = 1
1120  continue
1121  except TooManySymmetriesError:
1122  # Ligands are too symmetrical - skip
1123  LogWarning("Too many symmetries between %s and %s" % (
1124  str(model_ligand), str(target_ligand)))
1125  self._state_matrix_state_matrix[target_id, model_id] = 2
1126  continue
1127  except NoIsomorphicSymmetryError:
1128  # Ligands are different - skip
1129  LogInfo("No isomorphic symmetry between %s and %s" % (
1130  str(model_ligand), str(target_ligand)))
1131  self._state_matrix_state_matrix[target_id, model_id] = 3
1132  continue
1133  except DisconnectedGraphError:
1134  # this should never happen as we guard against
1135  # DisconnectedGraphError when precomputing the graph
1136  LogError("Disconnected graph observed for %s and %s" % (
1137  str(model_ligand), str(target_ligand)))
1138  # just set both ligand states to 4
1139  self._model_ligand_states_model_ligand_states[model_id] = 4
1140  self._model_ligand_states_model_ligand_states[target_id] = 4
1141  self._state_matrix_state_matrix[target_id, model_id] = 6
1142  continue
1143 
1144 
1147  score, pair_state, target_ligand_state, model_ligand_state, aux\
1148  = self._compute_compute(symmetries, target_ligand, model_ligand)
1149 
1150 
1153 
1154  # Ensure that returned states are associated with a
1155  # description. This is a requirement when subclassing
1156  # LigandScorer => state_decoding dict from base class must
1157  # be modified in subclass constructor
1158  if pair_state not in self.state_decodingstate_decoding or \
1159  target_ligand_state not in self.state_decodingstate_decoding or \
1160  model_ligand_state not in self.state_decodingstate_decoding:
1161  raise RuntimeError(f"Subclass returned state "
1162  f"\"{state}\" for which no "
1163  f"description is available. Point "
1164  f"the developer of the used scorer "
1165  f"to this error message.")
1166 
1167  # if any of the ligand states is non-zero, this must be marked
1168  # by the scorer subclass by specifying a specific pair state
1169  if target_ligand_state != 0 and pair_state != 6:
1170  raise RuntimeError("Observed non-zero target-ligand "
1171  "state which must trigger certain "
1172  "pair state. Point the developer of "
1173  "the used scorer to this error message")
1174 
1175  if model_ligand_state != 0 and pair_state != 6:
1176  raise RuntimeError("Observed non-zero model-ligand "
1177  "state which must trigger certain "
1178  "pair state. Point the developer of "
1179  "the used scorer to this error message")
1180 
1181  self._state_matrix_state_matrix[target_id, model_id] = pair_state
1182  self._target_ligand_states_target_ligand_states[target_id] = target_ligand_state
1183  self._model_ligand_states_model_ligand_states[model_id] = model_ligand_state
1184  if pair_state == 0:
1185  if score is None or np.isnan(score):
1186  raise RuntimeError("LigandScorer returned invalid "
1187  "score despite 0 error state")
1188  # it's a valid score!
1189  self._score_matrix_score_matrix[target_id, model_id] = score
1190  cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1191  self._coverage_matrix_coverage_matrix[target_id, model_id] = cvg
1192  self._aux_matrix_aux_matrix[target_id, model_id] = aux
1193 
1194  def _compute(self, symmetries, target_ligand, model_ligand):
1195  """ Compute score for specified ligand pair - defined by child class
1196 
1197  Raises :class:`NotImplementedError` if not implemented by child class.
1198 
1199  :param symmetries: Defines symmetries between *target_ligand* and
1200  *model_ligand*. Return value of
1201  :func:`ComputeSymmetries`
1202  :type symmetries: :class:`list` of :class:`tuple` with two elements
1203  each: 1) :class:`list` of atom indices in
1204  *target_ligand* 2) :class:`list` of respective atom
1205  indices in *model_ligand*
1206  :param target_ligand: The target ligand
1207  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1208  :class:`ost.mol.ResidueView`
1209  :param model_ligand: The model ligand
1210  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1211  :class:`ost.mol.ResidueView`
1212 
1213  :returns: A :class:`tuple` with three elements: 1) a score
1214  (:class:`float`) 2) state (:class:`int`).
1215  3) auxiliary data for this ligand pair (:class:`dict`).
1216  If state is 0, the score and auxiliary data will be
1217  added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1218  as the respective value in :attr:`~coverage_matrix`.
1219  Returned score must be valid in this case (not None/NaN).
1220  Child specific non-zero states must be >= 10.
1221  """
1222  raise NotImplementedError("_compute must be implemented by child "
1223  "class")
1224 
1225  def _score_dir(self):
1226  """ Return direction of score - defined by child class
1227 
1228  Relevant for ligand assignment. Must return a string in ['+', '-'].
1229  '+' for ascending scores, i.e. higher is better (lddt etc.)
1230  '-' for descending scores, i.e. lower is better (rmsd etc.)
1231  """
1232  raise NotImplementedError("_score_dir must be implemented by child "
1233  "class")
1234 
1235  def _copy_ligand(self, l, ent, ed, rename_ligand_chain):
1236  """ Copies ligand into entity and returns residue handle
1237  """
1238  new_chain = ent.FindChain(l.chain.name)
1239  if not new_chain.IsValid():
1240  new_chain = ed.InsertChain(l.chain.name)
1241  ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY)
1242  else:
1243  # Does a residue with the same name already exist?
1244  already_exists = new_chain.FindResidue(l.number).IsValid()
1245  if already_exists:
1246  if rename_ligand_chain:
1247  chain_ext = 2 # Extend the chain name by this
1248  while True:
1249  new_chain_name = \
1250  l.chain.name + "_" + str(chain_ext)
1251  new_chain = ent.FindChain(new_chain_name)
1252  if new_chain.IsValid():
1253  chain_ext += 1
1254  continue
1255  else:
1256  new_chain = \
1257  ed.InsertChain(new_chain_name)
1258  break
1259  LogInfo("Moved ligand residue %s to new chain %s" % (
1260  l.qualified_name, new_chain.name))
1261  else:
1262  msg = \
1263  "A residue number %s already exists in chain %s" % (
1264  l.number, l.chain.name)
1265  raise RuntimeError(msg)
1266 
1267  # Add the residue with its original residue number
1268  new_res = ed.AppendResidue(new_chain, l.name, l.number)
1269  # Add atoms
1270  for old_atom in l.atoms:
1271  ed.InsertAtom(new_res, old_atom.name, old_atom.pos,
1272  element=old_atom.element, occupancy=old_atom.occupancy,
1273  b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
1274  # Add bonds
1275  for old_atom in l.atoms:
1276  for old_bond in old_atom.bonds:
1277  new_first = new_res.FindAtom(old_bond.first.name)
1278  new_second = new_res.FindAtom(old_bond.second.name)
1279  ed.Connect(new_first, new_second, old_bond.bond_order)
1280  new_res.SetIsLigand(True)
1281  return new_res
1282 
1283  def _cleanup_polymer_ent(self, ent, clib):
1284  """ In principle molck light but logs LigandScorer specific warnings
1285 
1286  Only to be applied to polymer entity
1287 
1288  1) removes atoms with elements set to H or D (not logged as there is no
1289  effect on scoring)
1290  2) removes residues with no entry in component dictionary
1291  3) removes all residues that are not peptide_liking or
1292  nucleotide_linking according component dictionary
1293  4) removes unknown atoms according to component dictionary
1294  5) reruns processor
1295  """
1296 
1297  cleanup_log = {"cleaned_residues": {"no_clib": [],
1298  "not_linking": []},
1299  "cleaned_atoms": {"unknown_atoms": []}}
1300 
1301  # 1)
1302  hydrogen_sel = ent.Select("ele == H or ele == D")
1303  if hydrogen_sel.GetAtomCount() > 0:
1304  # just do, no logging as it has no effect on scoring
1305  ent = ost.mol.CreateEntityFromView(ent.Select(
1306  "ele != H and ele != D"), include_exlusive_atoms=False)
1307 
1308  # extract residues/atoms for 2), 3) and 4)
1309  not_in_clib = list()
1310  not_linking = list()
1311  unknown_atom = list()
1312 
1313  for r in ent.residues:
1314  comp = clib.FindCompound(r.GetName())
1315  if comp is None:
1316  not_in_clib.append(r)
1317  continue
1318  if not (comp.IsPeptideLinking() or comp.IsNucleotideLinking()):
1319  not_linking.append(r)
1320  continue
1321  comp_anames = set([a.name for a in comp.atom_specs])
1322  for a in r.atoms:
1323  if a.name not in comp_anames:
1324  unknown_atom.append(a)
1325 
1326  # 2)
1327  if len(not_in_clib) > 0:
1328  cleanup_log["cleaned_residues"]["no_clib"] = \
1329  [_QualifiedResidueNotation(r) for r in not_in_clib]
1330  msg = ("Expect all residues in receptor structure to be defined in "
1331  "compound library but this is not the case. "
1332  "Please refer to the OpenStructure website if an updated "
1333  "library is sufficient to solve the problem: "
1334  "https://openstructure.org/docs/conop/compoundlib/ "
1335  "These residues will not be considered for scoring (which "
1336  "may also affect pre-processing steps such as alignments): ")
1337  msg += ','.join(cleanup_log["cleaned_residues"]["no_clib"])
1338  ost.LogWarning(msg)
1339  for r in not_in_clib:
1340  r.SetIntProp("clib", 1)
1341  ent = ost.mol.CreateEntityFromView(ent.Select("grclib:0!=1"),
1342  include_exlusive_atoms=False)
1343 
1344  # 3)
1345  if len(not_linking) > 0:
1346  cleanup_log["cleaned_residues"]["not_linking"] = \
1347  [_QualifiedResidueNotation(r) for r in not_linking]
1348  msg = ("Expect all residues in receptor structure to be peptide "
1349  "linking or nucleotide linking according to the compound "
1350  "library but this is not the case. "
1351  "Please refer to the OpenStructure website if an updated "
1352  "library is sufficient to solve the problem: "
1353  "https://openstructure.org/docs/conop/compoundlib/ "
1354  "These residues will not be considered for scoring (which "
1355  "may also affect pre-processing steps such as alignments): ")
1356  msg += ','.join(cleanup_log["cleaned_residues"]["not_linking"])
1357  ost.LogWarning(msg)
1358  for r in not_linking:
1359  r.SetIntProp("linking", 1)
1360  ent = ost.mol.CreateEntityFromView(ent.Select("grlinking:0!=1"),
1361  include_exlusive_atoms=False)
1362 
1363  # 4)
1364  if len(unknown_atom) > 0:
1365  cleanup_log["cleaned_atoms"]["unknown_atoms"] = \
1366  [_QualifiedAtomNotation(a) for a in unknown_atom]
1367  msg = ("Expect atom names according to the compound library but "
1368  "this is not the case."
1369  "Please refer to the OpenStructure website if an updated "
1370  "library is sufficient to solve the problem: "
1371  "https://openstructure.org/docs/conop/compoundlib/ "
1372  "These atoms will not be considered for scoring: ")
1373  msg += ','.join(cleanup_log["cleaned_atoms"]["unknown_atoms"])
1374  ost.LogWarning(msg)
1375  for a in unknown_atom:
1376  a.SetIntProp("unknown", 1)
1377  ent = ost.mol.CreateEntityFromView(ent.Select("gaunknown:0!=1"),
1378  include_exlusive_atoms=False)
1379 
1380  # 5)
1381  processor = conop.RuleBasedProcessor(clib)
1382  processor.Process(ent)
1383 
1384  return (ent, cleanup_log)
1385 
1386 
1387 def _ResidueToGraph(residue, by_atom_index=False):
1388  """Return a NetworkX graph representation of the residue.
1389 
1390  :param residue: the residue from which to derive the graph
1391  :type residue: :class:`ost.mol.ResidueHandle` or
1392  :class:`ost.mol.ResidueView`
1393  :param by_atom_index: Set this parameter to True if you need the nodes to
1394  be labeled by atom index (within the residue).
1395  Otherwise, if False, the nodes will be labeled by
1396  atom names.
1397  :type by_atom_index: :class:`bool`
1398  :rtype: :class:`~networkx.classes.graph.Graph`
1399 
1400  Nodes are labeled with the Atom's uppercase
1401  :attr:`~ost.mol.AtomHandle.element`.
1402  """
1403  nxg = networkx.Graph()
1404 
1405  for atom in residue.atoms:
1406  nxg.add_node(atom.name, element=atom.element.upper())
1407 
1408  # This will list all edges twice - once for every atom of the pair.
1409  # But as of NetworkX 3.0 adding the same edge twice has no effect,
1410  # so we're good.
1411  nxg.add_edges_from([(
1412  b.first.name,
1413  b.second.name) for a in residue.atoms for b in a.GetBondList()])
1414 
1415  if by_atom_index:
1416  nxg = networkx.relabel_nodes(nxg,
1417  {a: b for a, b in zip(
1418  [a.name for a in residue.atoms],
1419  range(len(residue.atoms)))},
1420  True)
1421  return nxg
1422 
1423 def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1424  by_atom_index=False, return_symmetries=True,
1425  max_symmetries=1e6, model_graph = None,
1426  target_graph = None):
1427  """Return a list of symmetries (isomorphisms) of the model onto the target
1428  residues.
1429 
1430  :param model_ligand: The model ligand
1431  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1432  :class:`ost.mol.ResidueView`
1433  :param target_ligand: The target ligand
1434  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1435  :class:`ost.mol.ResidueView`
1436  :param substructure_match: Set this to True to allow partial ligands
1437  in the reference.
1438  :type substructure_match: :class:`bool`
1439  :param by_atom_index: Set this parameter to True if you need the symmetries
1440  to refer to atom index (within the residue).
1441  Otherwise, if False, the symmetries refer to atom
1442  names.
1443  :type by_atom_index: :class:`bool`
1444  :type return_symmetries: If Truthy, return the mappings, otherwise simply
1445  return True if a mapping is found (and raise if
1446  no mapping is found). This is useful to quickly
1447  find out if a mapping exist without the expensive
1448  step to find all the mappings.
1449  :type return_symmetries: :class:`bool`
1450  :param max_symmetries: If more than that many isomorphisms exist, raise
1451  a :class:`TooManySymmetriesError`. This can only be assessed by
1452  generating at least that many isomorphisms and can take some time.
1453  :type max_symmetries: :class:`int`
1454  :raises: :class:`NoSymmetryError` when no symmetry can be found;
1455  :class:`NoIsomorphicSymmetryError` in case of isomorphic
1456  subgraph but *substructure_match* is False;
1457  :class:`TooManySymmetriesError` when more than `max_symmetries`
1458  isomorphisms are found; :class:`DisconnectedGraphError` if
1459  graph for *model_ligand*/*target_ligand* is disconnected.
1460  """
1461 
1462  # Get the Graphs of the ligands
1463  if model_graph is None:
1464  model_graph = _ResidueToGraph(model_ligand,
1465  by_atom_index=by_atom_index)
1466 
1467  if not networkx.is_connected(model_graph):
1468  msg = "Disconnected graph for model ligand %s" % model_ligand
1469  raise DisconnectedGraphError(msg)
1470 
1471 
1472  if target_graph is None:
1473  target_graph = _ResidueToGraph(target_ligand,
1474  by_atom_index=by_atom_index)
1475 
1476  if not networkx.is_connected(target_graph):
1477  msg = "Disconnected graph for target ligand %s" % target_ligand
1478  raise DisconnectedGraphError(msg)
1479 
1480  # Note the argument order (model, target) which differs from spyrmsd.
1481  # This is because a subgraph of model is isomorphic to target - but not the
1482  # opposite as we only consider partial ligands in the reference.
1483  # Make sure to generate the symmetries correctly in the end
1484  gm = networkx.algorithms.isomorphism.GraphMatcher(
1485  model_graph, target_graph, node_match=lambda x, y:
1486  x["element"] == y["element"])
1487  if gm.is_isomorphic():
1488  if not return_symmetries:
1489  return True
1490  symmetries = []
1491  for i, isomorphism in enumerate(gm.isomorphisms_iter()):
1492  if i >= max_symmetries:
1493  raise TooManySymmetriesError(
1494  "Too many symmetries between %s and %s" % (
1495  str(model_ligand), str(target_ligand)))
1496  symmetries.append((list(isomorphism.values()),
1497  list(isomorphism.keys())))
1498  assert len(symmetries) > 0
1499  LogVerbose("Found %s isomorphic mappings (symmetries)" % len(symmetries))
1500  elif gm.subgraph_is_isomorphic() and substructure_match:
1501  if not return_symmetries:
1502  return True
1503  symmetries = []
1504  for i, isomorphism in enumerate(gm.subgraph_isomorphisms_iter()):
1505  if i >= max_symmetries:
1506  raise TooManySymmetriesError(
1507  "Too many symmetries between %s and %s" % (
1508  str(model_ligand), str(target_ligand)))
1509  symmetries.append((list(isomorphism.values()),
1510  list(isomorphism.keys())))
1511  assert len(symmetries) > 0
1512  # Assert that all the atoms in the target are part of the substructure
1513  assert len(symmetries[0][0]) == len(target_ligand.atoms)
1514  n_sym = len(symmetries)
1515  LogVerbose("Found %s subgraph isomorphisms (symmetries)" % n_sym)
1516  elif gm.subgraph_is_isomorphic():
1517  LogVerbose("Found subgraph isomorphisms (symmetries), but"
1518  " ignoring because substructure_match=False")
1519  raise NoIsomorphicSymmetryError("No symmetry between %s and %s" % (
1520  str(model_ligand), str(target_ligand)))
1521  else:
1522  LogVerbose("Found no isomorphic mappings (symmetries)")
1523  raise NoSymmetryError("No symmetry between %s and %s" % (
1524  str(model_ligand), str(target_ligand)))
1525 
1526  return symmetries
1527 
1528 class NoSymmetryError(ValueError):
1529  """ Exception raised when no symmetry can be found.
1530  """
1531  pass
1532 
1533 class NoIsomorphicSymmetryError(ValueError):
1534  """ Exception raised when no isomorphic symmetry can be found
1535 
1536  There would be isomorphic subgraphs for which symmetries can
1537  be found, but substructure_match is disabled
1538  """
1539  pass
1540 
1541 class TooManySymmetriesError(ValueError):
1542  """ Exception raised when too many symmetries are found.
1543  """
1544  pass
1545 
1546 class DisconnectedGraphError(Exception):
1547  """ Exception raised when the ligand graph is disconnected.
1548  """
1549  pass
1550 
1551 # specify public interface
1552 __all__ = ('LigandScorer', 'ComputeSymmetries', 'NoSymmetryError',
1553  'NoIsomorphicSymmetryError', 'TooManySymmetriesError',
1554  'DisconnectedGraphError')
Protein or molecule.
definition of EntityView
Definition: entity_view.hh:86
def _copy_ligand(self, l, ent, ed, rename_ligand_chain)
def __init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, rename_ligand_chain=False, min_pep_length=6, min_nuc_length=4, pep_seqid_thr=95., nuc_seqid_thr=95., mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0., seqres=None, trg_seqres_mapping=None)
def _compute(self, symmetries, target_ligand, model_ligand)
def _get_report(self, ligand_state, pair_states)
def PopVerbosityLevel()
Definition: __init__.py:232
def PushVerbosityLevel(value)
Definition: __init__.py:229
def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)
EntityHandle DLLEXPORT_OST_MOL CreateEntityFromView(const EntityView &view, bool include_exlusive_atoms, EntityHandle handle=EntityHandle())
create new entity handle from entity view