OpenStructure
ligand_scoring_base.py
Go to the documentation of this file.
1 from contextlib import contextmanager
2 import numpy as np
3 import networkx
4 
5 from ost import mol
6 from ost import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
7 from ost.mol.alg import chain_mapping
8 
9 
10 @contextmanager
11 def _SinkVerbosityLevel(n=1):
12  """ Context manager to temporarily reduce the verbosity level by n.
13 
14  Example usage:
15  with _SinkVerbosityLevel(2):
16  LogVerbose("Test")
17  Will only write "Test" in script level (2 above)
18  """
19  PushVerbosityLevel(GetVerbosityLevel() - n)
20  try:
21  yield
22  except:
23  raise
24  finally:
26 
27 
29  """ Scorer to compute various small molecule ligand (non polymer) scores.
30 
31  .. note ::
32  Extra requirements:
33 
34  - Python modules `numpy` and `networkx` must be available
35  (e.g. use ``pip install numpy networkx``)
36 
37  :class:`LigandScorer` is an abstract base class dealing with all the setup,
38  data storage, enumerating ligand symmetries and target/model ligand
39  matching/assignment. But actual score computation is delegated to child
40  classes.
41 
42  At the moment, two such classes are available:
43 
44  * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
45  that assesses the conservation of protein-ligand
46  contacts (lDDT-PLI);
47  * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
48  that computes a binding-site superposed, symmetry-corrected RMSD
49  (BiSyRMSD) and ligand pocket lDDT (lDDT-LP).
50 
51  All versus all scores are available through the lazily computed
52  :attr:`score_matrix`. However, many things can go wrong... be it even
53  something as simple as two ligands not matching. Error states therefore
54  encode scoring issues. An Issue for a particular ligand is indicated by a
55  non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
56  This invalidates pairwise scores of such a ligand with all other ligands.
57  This and other issues in pairwise score computation are reported in
58  :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
59  Only if the respective location is 0, a valid pairwise score can be
60  expected. The states and their meaning can be explored with code::
61 
62  for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
63  print(state_code)
64  print(short_desc)
65  print(desc)
66 
67  A common use case is to derive a one-to-one mapping between ligands in
68  the model and the target for which :class:`LigandScorer` provides an
69  automated :attr:`assignment` procedure.
70  By default, only exact matches between target and model ligands are
71  considered. This is a problem when the target only contains a subset
72  of the expected atoms (for instance if atoms are missing in an
73  experimental structure, which often happens in the PDB). With
74  `substructure_match=True`, complete model ligands can be scored against
75  partial target ligands. One problem with this approach is that it is
76  very easy to find good matches to small, irrelevant ligands like EDO, CO2
77  or GOL. The assignment algorithm therefore considers the coverage,
78  expressed as the fraction of atoms of the model ligand atoms covered in the
79  target. Higher coverage matches are prioritized, but a match with a better
80  score will be preferred if it falls within a window of `coverage_delta`
81  (by default 0.2) of a worse-scoring match. As a result, for instance,
82  with a delta of 0.2, a low-score match with coverage 0.96 would be
83  preferred over a high-score match with coverage 0.70.
84 
85  Assumptions:
86 
87  :class:`LigandScorer` generally assumes that the
88  :attr:`~ost.mol.ResidueHandle.is_ligand` property is properly set on all
89  the ligand residues, and only ligand atoms. This is typically the case for
90  entities loaded from mmCIF (tested with mmCIF files from the PDB and
91  SWISS-MODEL). Legacy PDB files must contain `HET` headers (which is usually
92  the case for files downloaded from the PDB but not elsewhere).
93 
94  The class doesn't perform any cleanup of the provided structures.
95  It is up to the caller to ensure that the data is clean and suitable for
96  scoring. :ref:`Molck <molck>` should be used with extra
97  care, as many of the options (such as `rm_non_std` or `map_nonstd_res`) can
98  cause ligands to be removed from the structure. If cleanup with Molck is
99  needed, ligands should be kept aside and passed separately. Non-ligand
100  residues should be valid compounds with atom names following the naming
101  conventions of the component dictionary. Non-standard residues are
102  acceptable, and if the model contains a standard residue at that position,
103  only atoms with matching names will be considered.
104 
105  Unlike most of OpenStructure, this class does not assume that the ligands
106  (either in the model or the target) are part of the PDB component
107  dictionary. They may have arbitrary residue names. Residue names do not
108  have to match between the model and the target. Matching is based on
109  the calculation of isomorphisms which depend on the atom element name and
110  atom connectivity (bond order is ignored).
111  It is up to the caller to ensure that the connectivity of atoms is properly
112  set before passing any ligands to this class. Ligands with improper
113  connectivity will lead to bogus results.
114 
115  Note, however, that atom names should be unique within a residue (ie two
116  distinct atoms cannot have the same atom name).
117 
118  This only applies to the ligand. The rest of the model and target
119  structures (protein, nucleic acids) must still follow the usual rules and
120  contain only residues from the compound library.
121 
122  Although it isn't a requirement, hydrogen atoms should be removed from the
123  structures. Here is an example code snippet that will perform a reasonable
124  cleanup. Keep in mind that this is most likely not going to work as
125  expected with entities loaded from PDB files, as the `is_ligand` flag is
126  probably not set properly.
127 
128  Here is an example of how to use setup a scorer code::
129 
130  from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
131  from ost.mol.alg import Molck, MolckSettings
132 
133  # Load data
134  # Structure model in PDB format, containing the receptor only
135  model = io.LoadPDB("path_to_model.pdb")
136  # Ligand model as SDF file
137  model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
138  # Target loaded from mmCIF, containing the ligand
139  target = io.LoadMMCIF("path_to_target.cif")
140 
141  # Cleanup a copy of the structures
142  cleaned_model = model.Copy()
143  cleaned_target = target.Copy()
144  molck_settings = MolckSettings(rm_unk_atoms=True,
145  rm_non_std=False,
146  rm_hyd_atoms=True,
147  rm_oxt_atoms=False,
148  rm_zero_occ_atoms=False,
149  colored=False,
150  map_nonstd_res=False,
151  assign_elem=True)
152  Molck(cleaned_model, conop.GetDefaultLib(), molck_settings)
153  Molck(cleaned_target, conop.GetDefaultLib(), molck_settings)
154 
155  # Setup scorer object and compute lDDT-PLI
156  model_ligands = [model_ligand.Select("ele != H")]
157  sc = SCRMSDScorer(cleaned_model, cleaned_target, model_ligands)
158 
159  # Perform assignment and read respective scores
160  for lig_pair in sc.assignment:
161  trg_lig = sc.target_ligands[lig_pair[0]]
162  mdl_lig = sc.model_ligands[lig_pair[1]]
163  score = sc.score_matrix[lig_pair[0], lig_pair[1]]
164  print(f"Score for {trg_lig} and {mdl_lig}: {score}")
165 
166  :param model: Model structure - a deep copy is available as :attr:`model`.
167  No additional processing (ie. Molck), checks,
168  stereochemistry checks or sanitization is performed on the
169  input. Hydrogen atoms are kept.
170  :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
171  :param target: Target structure - a deep copy is available as
172  :attr:`target`. No additional processing (ie. Molck), checks
173  or sanitization is performed on the input. Hydrogen atoms are
174  kept.
175  :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
176  :param model_ligands: Model ligands, as a list of
177  :class:`~ost.mol.ResidueHandle` belonging to the model
178  entity. Can be instantiated with either a :class:list
179  of :class:`~ost.mol.ResidueHandle`/
180  :class:`ost.mol.ResidueView` or of
181  :class:`ost.mol.EntityHandle`/
182  :class:`ost.mol.EntityView`.
183  If `None`, ligands will be extracted based on the
184  :attr:`~ost.mol.ResidueHandle.is_ligand` flag (this is
185  normally set properly in entities loaded from mmCIF).
186  :type model_ligands: :class:`list`
187  :param target_ligands: Target ligands, as a list of
188  :class:`~ost.mol.ResidueHandle` belonging to the
189  target entity. Can be instantiated either a
190  :class:list of :class:`~ost.mol.ResidueHandle`/
191  :class:`ost.mol.ResidueView` or of
192  :class:`ost.mol.EntityHandle`/
193  :class:`ost.mol.EntityView` containing a single
194  residue each. If `None`, ligands will be extracted
195  based on the :attr:`~ost.mol.ResidueHandle.is_ligand`
196  flag (this is normally set properly in entities
197  loaded from mmCIF).
198  :type target_ligands: :class:`list`
199  :param resnum_alignments: Whether alignments between chemically equivalent
200  chains in *model* and *target* can be computed
201  based on residue numbers. This can be assumed in
202  benchmarking setups such as CAMEO/CASP.
203  :type resnum_alignments: :class:`bool`
204  :param rename_ligand_chain: If a residue with the same chain name and
205  residue number than an explicitly passed model
206  or target ligand exits in the structure,
207  and `rename_ligand_chain` is False, a
208  RuntimeError will be raised. If
209  `rename_ligand_chain` is True, the ligand will
210  be moved to a new chain instead, and the move
211  will be logged to the console with SCRIPT
212  level.
213  :type rename_ligand_chain: :class:`bool`
214  :param substructure_match: Set this to True to allow incomplete (i.e.
215  partially resolved) target ligands.
216  :type substructure_match: :class:`bool`
217  :param coverage_delta: the coverage delta for partial ligand assignment.
218  :type coverage_delta: :class:`float`
219  :param max_symmetries: If more than that many isomorphisms exist for
220  a target-ligand pair, it will be ignored and reported
221  as unassigned.
222  :type max_symmetries: :class:`int`
223  """
224 
225  def __init__(self, model, target, model_ligands=None, target_ligands=None,
226  resnum_alignments=False, rename_ligand_chain=False,
227  substructure_match=False, coverage_delta=0.2,
228  max_symmetries=1e5):
229 
230  if isinstance(model, mol.EntityView):
231  self.modelmodel = mol.CreateEntityFromView(model, False)
232  elif isinstance(model, mol.EntityHandle):
233  self.modelmodel = model.Copy()
234  else:
235  raise RuntimeError("model must be of type EntityView/EntityHandle")
236 
237  if isinstance(target, mol.EntityView):
238  self.targettarget = mol.CreateEntityFromView(target, False)
239  elif isinstance(target, mol.EntityHandle):
240  self.targettarget = target.Copy()
241  else:
242  raise RuntimeError("target must be of type EntityView/EntityHandle")
243 
244  # Extract ligands from target
245  if target_ligands is None:
246  self.target_ligandstarget_ligands = self._extract_ligands_extract_ligands(self.targettarget)
247  else:
248  self.target_ligandstarget_ligands = self._prepare_ligands_prepare_ligands(self.targettarget, target,
249  target_ligands,
250  rename_ligand_chain)
251  if len(self.target_ligandstarget_ligands) == 0:
252  LogWarning("No ligands in the target")
253 
254  # Extract ligands from model
255  if model_ligands is None:
256  self.model_ligandsmodel_ligands = self._extract_ligands_extract_ligands(self.modelmodel)
257  else:
258  self.model_ligandsmodel_ligands = self._prepare_ligands_prepare_ligands(self.modelmodel, model,
259  model_ligands,
260  rename_ligand_chain)
261  if len(self.model_ligandsmodel_ligands) == 0:
262  LogWarning("No ligands in the model")
263  if len(self.target_ligandstarget_ligands) == 0:
264  raise ValueError("No ligand in the model and in the target")
265 
266  self.resnum_alignmentsresnum_alignments = resnum_alignments
267  self.rename_ligand_chainrename_ligand_chain = rename_ligand_chain
268  self.substructure_matchsubstructure_match = substructure_match
269  self.coverage_deltacoverage_delta = coverage_delta
270  self.max_symmetriesmax_symmetries = max_symmetries
271 
272  # lazily computed attributes
273  self.__chain_mapper__chain_mapper = None
274 
275  # keep track of states
276  # simple integers instead of enums - encoding available in
277  # self.state_decoding
278  self._state_matrix_state_matrix = None
279  self._model_ligand_states_model_ligand_states = None
280  self._target_ligand_states_target_ligand_states = None
281 
282  # score matrices
283  self._score_matrix_score_matrix = None
284  self._coverage_matrix_coverage_matrix = None
285  self._aux_matrix_aux_matrix = None
286 
287  # assignment and derived data
288  self._assignment_assignment = None
289  self._score_dict_score_dict = None
290  self._aux_dict_aux_dict = None
291 
292  # human readable description of states - child class must extend with
293  # with child class specific states
294  # each state code comes with a tuple of two elements:
295  # 1) short description 2) human readable description
296  # The actual states are set in _compute_scores in :class:`LigandScorer`
297  # or _compute_score of the child class.
298  if self.substructure_matchsubstructure_match:
299  iso = "subgraph isomorphism"
300  else:
301  iso = "full graph isomorphism"
302 
303  self.state_decodingstate_decoding = \
304  {0: ("OK", "OK"),
305  1: ("identity", f"Ligands could not be matched (by {iso})"),
306  2: ("symmetries", "Too many symmetries between ligand atoms were "
307  "found - increasing max_symmetries might help"),
308  3: ("no_iso", "No full isomorphic match could be found - enabling "
309  "substructure_match might allow a match"),
310  4: ("disconnected", "Ligand graph is disconnected"),
311  5: ("stoichiometry", "Ligand was already assigned to another ligand "
312  "(different stoichiometry)"),
313  6: ("single_ligand_issue", "Cannot compute valid pairwise score as "
314  "either model or target ligand have non-zero state."),
315  9: ("unknown", "An unknown error occured in LigandScorer")}
316 
317  @property
318  def state_matrix(self):
319  """ Encodes states of ligand pairs
320 
321  Ligand pairs can be matched and a valid score can be expected if
322  respective location in this matrix is 0.
323  Target ligands are in rows, model ligands in columns. States are encoded
324  as integers <= 9. Larger numbers encode errors for child classes.
325  Use something like ``self.state_decoding[3]`` to get a decscription.
326 
327  :rtype: :class:`~numpy.ndarray`
328  """
329  if self._state_matrix_state_matrix is None:
330  self._compute_scores_compute_scores()
331  return self._state_matrix_state_matrix
332 
333  @property
335  """ Encodes states of model ligands
336 
337  Non-zero state in any of the model ligands invalidates the full
338  respective column in :attr:`~state_matrix`.
339 
340  :rtype: :class:`~numpy.ndarray`
341  """
342  if self._model_ligand_states_model_ligand_states is None:
343  self._compute_scores_compute_scores()
344  return self._model_ligand_states_model_ligand_states
345 
346  @property
348  """ Encodes states of target ligands
349 
350  Non-zero state in any of the target ligands invalidates the full
351  respective row in :attr:`~state_matrix`.
352 
353  :rtype: :class:`~numpy.ndarray`
354  """
355  if self._target_ligand_states_target_ligand_states is None:
356  self._compute_scores_compute_scores()
357  return self._target_ligand_states_target_ligand_states
358 
359  @property
360  def score_matrix(self):
361  """ Get the matrix of scores.
362 
363  Target ligands are in rows, model ligands in columns.
364 
365  NaN values indicate that no value could be computed (i.e. different
366  ligands). In other words: values are only valid if the respective
367  location in :attr:`~state_matrix` is 0.
368 
369  :rtype: :class:`~numpy.ndarray`
370  """
371  if self._score_matrix_score_matrix is None:
372  self._compute_scores_compute_scores()
373  return self._score_matrix_score_matrix
374 
375  @property
376  def coverage_matrix(self):
377  """ Get the matrix of model ligand atom coverage in the target.
378 
379  Target ligands are in rows, model ligands in columns.
380 
381  NaN values indicate that no value could be computed (i.e. different
382  ligands). In other words: values are only valid if the respective
383  location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
384  only full match isomorphisms are considered, and therefore only values
385  of 1.0 can be observed.
386 
387  :rtype: :class:`~numpy.ndarray`
388  """
389  if self._coverage_matrix_coverage_matrix is None:
390  self._compute_scores_compute_scores()
391  return self._coverage_matrix_coverage_matrix
392 
393  @property
394  def aux_matrix(self):
395  """ Get the matrix of scorer specific auxiliary data.
396 
397  Target ligands are in rows, model ligands in columns.
398 
399  Auxiliary data consists of arbitrary data dicts which allow a child
400  class to provide additional information for a scored ligand pair.
401  empty dictionaries indicate that the child class simply didn't return
402  anything or that no value could be computed (e.g. different ligands).
403  In other words: values are only valid if respective location in the
404  :attr:`~state_matrix` is 0.
405 
406  :rtype: :class:`~numpy.ndarray`
407  """
408  if self._aux_matrix_aux_matrix is None:
409  self._compute_scores_compute_scores()
410  return self._aux_matrix_aux_matrix
411 
412  @property
413  def assignment(self):
414  """ Ligand assignment based on computed scores
415 
416  Implements a greedy algorithm to assign target and model ligands
417  with each other. Starts from each valid ligand pair as indicated
418  by a state of 0 in :attr:`state_matrix`. Each iteration first selects
419  high coverage pairs. Given max_coverage defined as the highest
420  coverage observed in the available pairs, all pairs with coverage
421  in [max_coverage-*coverage_delta*, max_coverage] are selected.
422  The best scoring pair among those is added to the assignment
423  and the whole process is repeated until there are no ligands to
424  assign anymore.
425 
426  :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
427  """
428  if self._assignment_assignment is None:
429  self._assignment_assignment = list()
430  # Build working array that contains tuples for all mdl/trg ligand
431  # pairs with valid score as indicated by a state of 0:
432  # (score, coverage, trg_ligand_idx, mdl_ligand_idx)
433  tmp = list()
434  for trg_idx in range(self.score_matrixscore_matrix.shape[0]):
435  for mdl_idx in range(self.score_matrixscore_matrix.shape[1]):
436  if self.state_matrixstate_matrix[trg_idx, mdl_idx] == 0:
437  tmp.append((self.score_matrixscore_matrix[trg_idx, mdl_idx],
438  self.coverage_matrixcoverage_matrix[trg_idx, mdl_idx],
439  trg_idx, mdl_idx))
440 
441  # sort by score, such that best scoring item is in front
442  if self._score_dir_score_dir() == '+':
443  tmp.sort(reverse=True)
444  elif self._score_dir_score_dir() == '-':
445  tmp.sort()
446  else:
447  raise RuntimeError("LigandScorer._score_dir must return one in "
448  "['+', '-']")
449 
450  LogScript("Computing ligand assignment")
451  while len(tmp) > 0:
452  # select high coverage ligand pairs in working array
453  coverage_thresh = max([x[1] for x in tmp]) - self.coverage_deltacoverage_delta
454  top_coverage = [x for x in tmp if x[1] >= coverage_thresh]
455 
456  # working array is sorted by score => just pick first one
457  a = top_coverage[0][2] # selected trg_ligand_idx
458  b = top_coverage[0][3] # selected mdl_ligand_idx
459  self._assignment_assignment.append((a, b))
460 
461  # kick out remaining pairs involving these ligands
462  tmp = [x for x in tmp if (x[2] != a and x[3] != b)]
463 
464  return self._assignment_assignment
465 
466  @property
467  def score(self):
468  """ Get a dictionary of score values, keyed by model ligand
469 
470  Extract score with something like:
471  ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
472  The returned scores are based on :attr:`~assignment`.
473 
474  :rtype: :class:`dict`
475  """
476  if self._score_dict_score_dict is None:
477  self._score_dict_score_dict = dict()
478  for (trg_lig_idx, mdl_lig_idx) in self.assignmentassignment:
479  mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
480  cname = mdl_lig.GetChain().GetName()
481  rnum = mdl_lig.GetNumber()
482  if cname not in self._score_dict_score_dict:
483  self._score_dict_score_dict[cname] = dict()
484  score = self.score_matrixscore_matrix[trg_lig_idx, mdl_lig_idx]
485  self._score_dict_score_dict[cname][rnum] = score
486  return self._score_dict_score_dict
487 
488  @property
489  def aux(self):
490  """ Get a dictionary of score details, keyed by model ligand
491 
492  Extract dict with something like:
493  ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
494  The returned info dicts are based on :attr:`~assignment`. The content is
495  documented in the respective child class.
496 
497  :rtype: :class:`dict`
498  """
499  if self._aux_dict_aux_dict is None:
500  self._aux_dict_aux_dict = dict()
501  for (trg_lig_idx, mdl_lig_idx) in self.assignmentassignment:
502  mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
503  cname = mdl_lig.GetChain().GetName()
504  rnum = mdl_lig.GetNumber()
505  if cname not in self._aux_dict_aux_dict:
506  self._aux_dict_aux_dict[cname] = dict()
507  d = self.aux_matrixaux_matrix[trg_lig_idx, mdl_lig_idx]
508  self._aux_dict_aux_dict[cname][rnum] = d
509  return self._aux_dict_aux_dict
510 
511  @property
513  """ Get indices of target ligands which are not assigned
514 
515  :rtype: :class:`list` of :class:`int`
516  """
517  # compute on-the-fly, no need for caching
518  assigned = set([x[0] for x in self.assignmentassignment])
519  return [x for x in range(len(self.target_ligandstarget_ligands)) if x not in assigned]
520 
521  @property
523  """ Get indices of model ligands which are not assigned
524 
525  :rtype: :class:`list` of :class:`int`
526  """
527  # compute on-the-fly, no need for caching
528  assigned = set([x[1] for x in self.assignmentassignment])
529  return [x for x in range(len(self.model_ligandsmodel_ligands)) if x not in assigned]
530 
531  def get_target_ligand_state_report(self, trg_lig_idx):
532  """ Get summary of states observed with respect to all model ligands
533 
534  Mainly for debug purposes
535 
536  :param trg_lig_idx: Index of target ligand for which report should be
537  generated
538  :type trg_lig_idx: :class:`int`
539  """
540  return self._get_report_get_report(self.target_ligand_statestarget_ligand_states[trg_lig_idx],
541  self.state_matrixstate_matrix[trg_lig_idx,:])
542 
543  def get_model_ligand_state_report(self, mdl_lig_idx):
544  """ Get summary of states observed with respect to all target ligands
545 
546  Mainly for debug purposes
547 
548  :param mdl_lig_idx: Index of model ligand for which report should be
549  generated
550  :type mdl_lig_idx: :class:`int`
551  """
552  return self._get_report_get_report(self.model_ligand_statesmodel_ligand_states[mdl_lig_idx],
553  self.state_matrixstate_matrix[:, mdl_lig_idx])
554 
555  def _get_report(self, ligand_state, pair_states):
556  """ Helper
557  """
558  pair_report = list()
559  for s in np.unique(pair_states):
560  desc = self.state_decodingstate_decoding[s]
561  indices = np.flatnonzero(pair_states == s).tolist()
562  pair_report.append({"state": s,
563  "short desc": desc[0],
564  "desc": desc[1],
565  "indices": indices})
566 
567  desc = self.state_decodingstate_decoding[ligand_state]
568  ligand_report = {"state": ligand_state,
569  "short desc": desc[0],
570  "desc": desc[1]}
571 
572  return (ligand_report, pair_report)
573 
575  """ Makes an educated guess why target ligand is not assigned
576 
577  This either returns actual error states or custom states that are
578  derived from them. Currently, the following reasons are reported:
579 
580  * `no_ligand`: there was no ligand in the model.
581  * `disconnected`: the ligand graph was disconnected.
582  * `identity`: the ligand was not found in the model (by graph
583  isomorphism). Check your ligand connectivity.
584  * `no_iso`: no full isomorphic match could be found. Try enabling
585  `substructure_match=True` if the target ligand is incomplete.
586  * `symmetries`: too many symmetries were found (by graph isomorphisms).
587  Try to increase `max_symmetries`.
588  * `stoichiometry`: there was a possible assignment in the model, but
589  the model ligand was already assigned to a different target ligand.
590  This indicates different stoichiometries.
591  * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
592  the binding site and the ligand, and lDDT-PLI is undefined.
593  * `target_binding_site` (SCRMSD only): no polymer residues were in
594  proximity of the target ligand.
595  * `model_binding_site` (SCRMSD only): the binding site was not found
596  in the model. Either the binding site was not modeled or the model
597  ligand was positioned too far in combination with
598  `full_bs_search=False`.
599 
600  :param trg_lig_idx: Index of target ligand
601  :type trg_lig_idx: :class:`int`
602  :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
603  sentence describing the issue, (\"unknown\",\"unknown\") if
604  nothing obvious can be found.
605  :raises: :class:`RuntimeError` if specified target ligand is assigned
606  """
607  if trg_lig_idx not in self.unassigned_target_ligandsunassigned_target_ligands:
608  raise RuntimeError("Specified target ligand is not unassigned")
609 
610  # hardcoded tuple if there is simply nothing we can assign it to
611  if len(self.model_ligandsmodel_ligands) == 0:
612  return ("no_ligand", "No ligand in the model")
613 
614  # if something with the ligand itself is wrong, we can be pretty sure
615  # thats why the ligand is unassigned
616  if self.target_ligand_statestarget_ligand_states[trg_lig_idx] != 0:
617  return self.state_decodingstate_decoding[self.target_ligand_statestarget_ligand_states[trg_lig_idx]]
618 
619  # The next best guess comes from looking at pair states
620  tmp = np.unique(self.state_matrixstate_matrix[trg_lig_idx,:])
621 
622  # In case of any 0, it could have been assigned so it's probably
623  # just not selected due to different stoichiometry - this is no
624  # defined state, we just return a hardcoded tuple in this case
625  if 0 in tmp:
626  return ("stoichiometry",
627  "Ligand was already assigned to an other "
628  "model ligand (different stoichiometry)")
629 
630  # maybe it's a symmetry issue?
631  if 2 in tmp:
632  return self.state_decodingstate_decoding[2]
633 
634  # if the state is 6 (single_ligand_issue), there is an issue with its
635  # target counterpart.
636  if 6 in tmp:
637  mdl_idx = np.where(self.state_matrixstate_matrix[trg_lig_idx,:]==6)[0]
638  for i in mdl_idx:
639  if self.model_ligand_statesmodel_ligand_states[i] == 0:
640  raise RuntimeError("This should never happen")
641  if self.model_ligand_statesmodel_ligand_states[i] != 4 or len(tmp) == 1:
642  # Don't report disconnected if only 1 model ligand is
643  # disconnected, unless that's the only reason
644  return self.state_decodingstate_decoding[self.model_ligand_statesmodel_ligand_states[i]]
645 
646  # get rid of remaining single ligand issues (only disconnected error)
647  if 6 in tmp and len(tmp) > 1:
648  tmp = tmp[tmp!=6]
649 
650  # prefer everything over identity state
651  if 1 in tmp and len(tmp) > 1:
652  tmp = tmp[tmp!=1]
653 
654  # just return whatever is left
655  return self.state_decodingstate_decoding[tmp[0]]
656 
657 
658  def guess_model_ligand_unassigned_reason(self, mdl_lig_idx):
659  """ Makes an educated guess why model ligand is not assigned
660 
661  This either returns actual error states or custom states that are
662  derived from them. Currently, the following reasons are reported:
663 
664  * `no_ligand`: there was no ligand in the target.
665  * `disconnected`: the ligand graph is disconnected.
666  * `identity`: the ligand was not found in the target (by graph or
667  subgraph isomorphism). Check your ligand connectivity.
668  * `no_iso`: no full isomorphic match could be found. Try enabling
669  `substructure_match=True` if the target ligand is incomplete.
670  * `symmetries`: too many symmetries were found (by graph isomorphisms).
671  Try to increase `max_symmetries`.
672  * `stoichiometry`: there was a possible assignment in the target, but
673  the model target was already assigned to a different model ligand.
674  This indicates different stoichiometries.
675  * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
676  the binding site and the ligand, and lDDT-PLI is undefined.
677  * `target_binding_site` (SCRMSD only): a potential assignment was found
678  in the target, but there were no polymer residues in proximity of the
679  ligand in the target.
680  * `model_binding_site` (SCRMSD only): a potential assignment was
681  found in the target, but no binding site was found in the model.
682  Either the binding site was not modeled or the model ligand was
683  positioned too far in combination with `full_bs_search=False`.
684 
685  :param mdl_lig_idx: Index of model ligand
686  :type mdl_lig_idx: :class:`int`
687  :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
688  sentence describing the issue, (\"unknown\",\"unknown\") if
689  nothing obvious can be found.
690  :raises: :class:`RuntimeError` if specified model ligand is assigned
691  """
692  if mdl_lig_idx not in self.unassigned_model_ligandsunassigned_model_ligands:
693  raise RuntimeError("Specified model ligand is not unassigned")
694 
695  # hardcoded tuple if there is simply nothing we can assign it to
696  if len(self.target_ligandstarget_ligands) == 0:
697  return ("no_ligand", "No ligand in the target")
698 
699  # if something with the ligand itself is wrong, we can be pretty sure
700  # thats why the ligand is unassigned
701  if self.model_ligand_statesmodel_ligand_states[mdl_lig_idx] != 0:
702  return self.state_decodingstate_decoding[self.model_ligand_statesmodel_ligand_states[mdl_lig_idx]]
703 
704  # The next best guess comes from looking at pair states
705  tmp = np.unique(self.state_matrixstate_matrix[:,mdl_lig_idx])
706 
707  # In case of any 0, it could have been assigned so it's probably
708  # just not selected due to different stoichiometry - this is no
709  # defined state, we just return a hardcoded tuple in this case
710  if 0 in tmp:
711  return ("stoichiometry",
712  "Ligand was already assigned to an other "
713  "model ligand (different stoichiometry)")
714 
715  # maybe its a symmetry issue?
716  if 2 in tmp:
717  return self.state_decodingstate_decoding[2]
718 
719  # if the state is 6 (single_ligand_issue), there is an issue with its
720  # target counterpart.
721  if 6 in tmp:
722  trg_idx = np.where(self.state_matrixstate_matrix[:,mdl_lig_idx]==6)[0]
723  for i in trg_idx:
724  if self.target_ligand_statestarget_ligand_states[i] == 0:
725  raise RuntimeError("This should never happen")
726  if self.target_ligand_statestarget_ligand_states[i] != 4 or len(tmp) == 1:
727  # Don't report disconnected if only 1 target ligand is
728  # disconnected, unless that's the only reason
729  return self.state_decodingstate_decoding[self.target_ligand_statestarget_ligand_states[i]]
730 
731  # get rid of remaining single ligand issues (only disconnected error)
732  if 6 in tmp and len(tmp) > 1:
733  tmp = tmp[tmp!=6]
734 
735  # prefer everything over identity state
736  if 1 in tmp and len(tmp) > 1:
737  tmp = tmp[tmp!=1]
738 
739  # just return whatever is left
740  return self.state_decodingstate_decoding[tmp[0]]
741 
742  @property
744  return_dict = dict()
745  for i in self.unassigned_model_ligandsunassigned_model_ligands:
746  lig = self.model_ligandsmodel_ligands[i]
747  cname = lig.GetChain().GetName()
748  rnum = lig.GetNumber()
749  if cname not in return_dict:
750  return_dict[cname] = dict()
751  return_dict[cname][rnum] = \
752  self.guess_model_ligand_unassigned_reasonguess_model_ligand_unassigned_reason(i)[0]
753  return return_dict
754 
755  @property
757  return_dict = dict()
758  for i in self.unassigned_target_ligandsunassigned_target_ligands:
759  lig = self.target_ligandstarget_ligands[i]
760  cname = lig.GetChain().GetName()
761  rnum = lig.GetNumber()
762  if cname not in return_dict:
763  return_dict[cname] = dict()
764  return_dict[cname][rnum] = \
765  self.guess_target_ligand_unassigned_reasonguess_target_ligand_unassigned_reason(i)[0]
766  return return_dict
767 
768  @property
769  def _chain_mapper(self):
770  """ Chain mapper object for the given :attr:`target`.
771 
772  Can be used by child classes if needed, constructed with
773  *resnum_alignments* flag
774 
775  :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
776  """
777  if self.__chain_mapper__chain_mapper is None:
778  with _SinkVerbosityLevel():
779  self.__chain_mapper__chain_mapper = \
781  n_max_naive=1e9,
782  resnum_alignments=self.resnum_alignmentsresnum_alignments)
783  return self.__chain_mapper__chain_mapper
784 
785  @staticmethod
786  def _extract_ligands(entity):
787  """Extract ligands from entity. Return a list of residues.
788 
789  Assumes that ligands have the :attr:`~ost.mol.ResidueHandle.is_ligand`
790  flag set. This is typically the case for entities loaded from mmCIF
791  (tested with mmCIF files from the PDB and SWISS-MODEL).
792  Legacy PDB files must contain `HET` headers (which is usually the
793  case for files downloaded from the PDB but not elsewhere).
794 
795  This function performs basic checks to ensure that the residues in this
796  chain are not forming polymer bonds (ie peptide/nucleotide ligands) and
797  will raise a RuntimeError if this assumption is broken.
798 
799  :param entity: the entity to extract ligands from
800  :type entity: :class:`~ost.mol.EntityHandle`
801  :rtype: :class:`list` of :class:`~ost.mol.ResidueHandle`
802 
803  """
804  extracted_ligands = []
805  for residue in entity.residues:
806  if residue.is_ligand:
807  if mol.InSequence(residue, residue.next):
808  raise RuntimeError("Residue %s connected in polymer sequen"
809  "ce %s" % (residue.qualified_name))
810  extracted_ligands.append(residue)
811  LogVerbose("Detected residue %s as ligand" % residue)
812  return extracted_ligands
813 
814  @staticmethod
815  def _prepare_ligands(new_entity, old_entity, ligands, rename_chain):
816  """Prepare the ligands given into a list of ResidueHandles which are
817  part of the copied entity, suitable for the model_ligands and
818  target_ligands properties.
819 
820  This function takes a list of ligands as (Entity|Residue)(Handle|View).
821  Entities can contain multiple ligands, which will be considered as
822  separate ligands.
823 
824  Ligands which are part of the entity are simply fetched in the new
825  copied entity. Otherwise, they are copied over to the copied entity.
826  """
827  extracted_ligands = []
828 
829  next_chain_num = 1
830  new_editor = None
831 
832  def _copy_residue(residue, rename_chain):
833  """ Copy the residue into the new chain.
834  Return the new residue handle."""
835  nonlocal next_chain_num, new_editor
836 
837  # Instantiate the editor
838  if new_editor is None:
839  new_editor = new_entity.EditXCS(mol.BUFFERED_EDIT)
840 
841  new_chain = new_entity.FindChain(residue.chain.name)
842  if not new_chain.IsValid():
843  new_chain = new_editor.InsertChain(residue.chain.name)
844  new_editor.SetChainType(new_chain,
845  mol.ChainType.CHAINTYPE_NON_POLY)
846  else:
847  # Does a residue with the same name already exist?
848  already_exists = new_chain.FindResidue(residue.number).IsValid()
849  if already_exists:
850  if rename_chain:
851  chain_ext = 2 # Extend the chain name by this
852  while True:
853  new_chain_name = \
854  residue.chain.name + "_" + str(chain_ext)
855  new_chain = new_entity.FindChain(new_chain_name)
856  if new_chain.IsValid():
857  chain_ext += 1
858  continue
859  else:
860  new_chain = \
861  new_editor.InsertChain(new_chain_name)
862  break
863  LogInfo("Moved ligand residue %s to new chain %s" % (
864  residue.qualified_name, new_chain.name))
865  else:
866  msg = \
867  "A residue number %s already exists in chain %s" % (
868  residue.number, residue.chain.name)
869  raise RuntimeError(msg)
870 
871  # Add the residue with its original residue number
872  new_res = new_editor.AppendResidue(new_chain, residue.name,
873  residue.number)
874  # Add atoms
875  for old_atom in residue.atoms:
876  new_editor.InsertAtom(new_res, old_atom.name, old_atom.pos,
877  element=old_atom.element, occupancy=old_atom.occupancy,
878  b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
879  # Add bonds
880  for old_atom in residue.atoms:
881  for old_bond in old_atom.bonds:
882  new_first = new_res.FindAtom(old_bond.first.name)
883  new_second = new_res.FindAtom(old_bond.second.name)
884  new_editor.Connect(new_first, new_second)
885  return new_res
886 
887  def _process_ligand_residue(res, rename_chain):
888  """Copy or fetch the residue. Return the residue handle."""
889  new_res = None
890  if res.entity.handle == old_entity.handle:
891  # Residue is part of the old_entity handle.
892  # However, it may not be in the copied one, for instance it may
893  # have been a view We try to grab it first, otherwise we copy it
894  new_res = new_entity.FindResidue(res.chain.name, res.number)
895  if new_res and new_res.valid:
896  qname = res.handle.qualified_name
897  LogVerbose("Ligand residue %s already in entity" % qname)
898  else:
899  # Residue is not part of the entity, need to copy it first
900  new_res = _copy_residue(res, rename_chain)
901  qname = res.handle.qualified_name
902  LogVerbose("Copied ligand residue %s" % qname)
903  new_res.SetIsLigand(True)
904  return new_res
905 
906  for ligand in ligands:
907  is_eh = isinstance(ligand, mol.EntityHandle)
908  is_ev = isinstance(ligand, mol.EntityView)
909  is_rh = isinstance(ligand, mol.ResidueHandle)
910  is_rv = isinstance(ligand, mol.ResidueView)
911  if is_eh or is_ev:
912  for residue in ligand.residues:
913  new_residue = _process_ligand_residue(residue, rename_chain)
914  extracted_ligands.append(new_residue)
915  elif is_rh or is_rv:
916  new_residue = _process_ligand_residue(ligand, rename_chain)
917  extracted_ligands.append(new_residue)
918  else:
919  raise RuntimeError("Ligands should be given as Entity or "
920  "Residue")
921 
922  if new_editor is not None:
923  new_editor.UpdateICS()
924  return extracted_ligands
925 
926  def _compute_scores(self):
927  """
928  Compute score for every possible target-model ligand pair and store the
929  result in internal matrices.
930  """
931 
934  shape = (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands))
935  self._score_matrix_score_matrix = np.full(shape, np.nan, dtype=np.float32)
936  self._coverage_matrix_coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
937  self._state_matrix_state_matrix = np.full(shape, 0, dtype=np.int32)
938  self._model_ligand_states_model_ligand_states = np.zeros(len(self.model_ligandsmodel_ligands))
939  self._target_ligand_states_target_ligand_states = np.zeros(len(self.target_ligandstarget_ligands))
940  self._aux_matrix_aux_matrix = np.empty(shape, dtype=dict)
941 
942  # precompute ligand graphs
943  target_graphs = \
944  [_ResidueToGraph(l, by_atom_index=True) for l in self.target_ligandstarget_ligands]
945  model_graphs = \
946  [_ResidueToGraph(l, by_atom_index=True) for l in self.model_ligandsmodel_ligands]
947 
948  for g_idx, g in enumerate(target_graphs):
949  if not networkx.is_connected(g):
950  self._target_ligand_states_target_ligand_states[g_idx] = 4
951  self._state_matrix_state_matrix[g_idx,:] = 6
952  msg = "Disconnected graph observed for target ligand "
953  msg += str(self.target_ligandstarget_ligands[g_idx])
954  LogWarning(msg)
955 
956  for g_idx, g in enumerate(model_graphs):
957  if not networkx.is_connected(g):
958  self._model_ligand_states_model_ligand_states[g_idx] = 4
959  self._state_matrix_state_matrix[:,g_idx] = 6
960  msg = "Disconnected graph observed for model ligand "
961  msg += str(self.model_ligandsmodel_ligands[g_idx])
962  LogWarning(msg)
963 
964 
965  LogScript("Computing pairwise scores for all %s x %s ligands" % shape)
966  for target_id, target_ligand in enumerate(self.target_ligandstarget_ligands):
967  LogInfo("Analyzing target ligand %s" % target_ligand)
968 
969  if self._target_ligand_states_target_ligand_states[target_id] == 4:
970  # Disconnected graph - already updated states and reported
971  # to LogVerbose
972  continue
973 
974  for model_id, model_ligand in enumerate(self.model_ligandsmodel_ligands):
975  LogInfo("Comparing to model ligand %s" % model_ligand)
976 
977 
980 
981  if self._model_ligand_states_model_ligand_states[model_id] == 4:
982  # Disconnected graph - already updated states and reported
983  # to LogVerbose
984  continue
985 
986  try:
987  symmetries = ComputeSymmetries(
988  model_ligand, target_ligand,
989  substructure_match=self.substructure_matchsubstructure_match,
990  by_atom_index=True,
991  max_symmetries=self.max_symmetriesmax_symmetries,
992  model_graph = model_graphs[model_id],
993  target_graph = target_graphs[target_id])
994  LogInfo("Ligands %s and %s symmetry match" % (
995  str(model_ligand), str(target_ligand)))
996  except NoSymmetryError:
997  # Ligands are different - skip
998  LogInfo("No symmetry between %s and %s" % (
999  str(model_ligand), str(target_ligand)))
1000  self._state_matrix_state_matrix[target_id, model_id] = 1
1001  continue
1002  except TooManySymmetriesError:
1003  # Ligands are too symmetrical - skip
1004  LogWarning("Too many symmetries between %s and %s" % (
1005  str(model_ligand), str(target_ligand)))
1006  self._state_matrix_state_matrix[target_id, model_id] = 2
1007  continue
1008  except NoIsomorphicSymmetryError:
1009  # Ligands are different - skip
1010  LogInfo("No isomorphic symmetry between %s and %s" % (
1011  str(model_ligand), str(target_ligand)))
1012  self._state_matrix_state_matrix[target_id, model_id] = 3
1013  continue
1014  except DisconnectedGraphError:
1015  # this should never happen as we guard against
1016  # DisconnectedGraphError when precomputing the graph
1017  LogError("Disconnected graph observed for %s and %s" % (
1018  str(model_ligand), str(target_ligand)))
1019  # just set both ligand states to 4
1020  self._model_ligand_states_model_ligand_states[model_id] = 4
1021  self._model_ligand_states_model_ligand_states[target_id] = 4
1022  self._state_matrix_state_matrix[target_id, model_id] = 6
1023  continue
1024 
1025 
1028  score, pair_state, target_ligand_state, model_ligand_state, aux\
1029  = self._compute_compute(symmetries, target_ligand, model_ligand)
1030 
1031 
1034 
1035  # Ensure that returned states are associated with a
1036  # description. This is a requirement when subclassing
1037  # LigandScorer => state_decoding dict from base class must
1038  # be modified in subclass constructor
1039  if pair_state not in self.state_decodingstate_decoding or \
1040  target_ligand_state not in self.state_decodingstate_decoding or \
1041  model_ligand_state not in self.state_decodingstate_decoding:
1042  raise RuntimeError(f"Subclass returned state "
1043  f"\"{state}\" for which no "
1044  f"description is available. Point "
1045  f"the developer of the used scorer "
1046  f"to this error message.")
1047 
1048  # if any of the ligand states is non-zero, this must be marked
1049  # by the scorer subclass by specifying a specific pair state
1050  if target_ligand_state != 0 and pair_state != 6:
1051  raise RuntimeError("Observed non-zero target-ligand "
1052  "state which must trigger certain "
1053  "pair state. Point the developer of "
1054  "the used scorer to this error message")
1055 
1056  if model_ligand_state != 0 and pair_state != 6:
1057  raise RuntimeError("Observed non-zero model-ligand "
1058  "state which must trigger certain "
1059  "pair state. Point the developer of "
1060  "the used scorer to this error message")
1061 
1062  self._state_matrix_state_matrix[target_id, model_id] = pair_state
1063  self._target_ligand_states_target_ligand_states[target_id] = target_ligand_state
1064  self._model_ligand_states_model_ligand_states[model_id] = model_ligand_state
1065  if pair_state == 0:
1066  if score is None or np.isnan(score):
1067  raise RuntimeError("LigandScorer returned invalid "
1068  "score despite 0 error state")
1069  # it's a valid score!
1070  self._score_matrix_score_matrix[target_id, model_id] = score
1071  cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1072  self._coverage_matrix_coverage_matrix[target_id, model_id] = cvg
1073  self._aux_matrix_aux_matrix[target_id, model_id] = aux
1074 
1075  def _compute(self, symmetries, target_ligand, model_ligand):
1076  """ Compute score for specified ligand pair - defined by child class
1077 
1078  Raises :class:`NotImplementedError` if not implemented by child class.
1079 
1080  :param symmetries: Defines symmetries between *target_ligand* and
1081  *model_ligand*. Return value of
1082  :func:`ComputeSymmetries`
1083  :type symmetries: :class:`list` of :class:`tuple` with two elements
1084  each: 1) :class:`list` of atom indices in
1085  *target_ligand* 2) :class:`list` of respective atom
1086  indices in *model_ligand*
1087  :param target_ligand: The target ligand
1088  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1089  :class:`ost.mol.ResidueView`
1090  :param model_ligand: The model ligand
1091  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1092  :class:`ost.mol.ResidueView`
1093 
1094  :returns: A :class:`tuple` with three elements: 1) a score
1095  (:class:`float`) 2) state (:class:`int`).
1096  3) auxiliary data for this ligand pair (:class:`dict`).
1097  If state is 0, the score and auxiliary data will be
1098  added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1099  as the respective value in :attr:`~coverage_matrix`.
1100  Returned score must be valid in this case (not None/NaN).
1101  Child specific non-zero states must be >= 10.
1102  """
1103  raise NotImplementedError("_compute must be implemented by child "
1104  "class")
1105 
1106  def _score_dir(self):
1107  """ Return direction of score - defined by child class
1108 
1109  Relevant for ligand assignment. Must return a string in ['+', '-'].
1110  '+' for ascending scores, i.e. higher is better (lddt etc.)
1111  '-' for descending scores, i.e. lower is better (rmsd etc.)
1112  """
1113  raise NotImplementedError("_score_dir must be implemented by child "
1114  "class")
1115 
1116 
1117 def _ResidueToGraph(residue, by_atom_index=False):
1118  """Return a NetworkX graph representation of the residue.
1119 
1120  :param residue: the residue from which to derive the graph
1121  :type residue: :class:`ost.mol.ResidueHandle` or
1122  :class:`ost.mol.ResidueView`
1123  :param by_atom_index: Set this parameter to True if you need the nodes to
1124  be labeled by atom index (within the residue).
1125  Otherwise, if False, the nodes will be labeled by
1126  atom names.
1127  :type by_atom_index: :class:`bool`
1128  :rtype: :class:`~networkx.classes.graph.Graph`
1129 
1130  Nodes are labeled with the Atom's uppercase
1131  :attr:`~ost.mol.AtomHandle.element`.
1132  """
1133  nxg = networkx.Graph()
1134 
1135  for atom in residue.atoms:
1136  nxg.add_node(atom.name, element=atom.element.upper())
1137 
1138  # This will list all edges twice - once for every atom of the pair.
1139  # But as of NetworkX 3.0 adding the same edge twice has no effect,
1140  # so we're good.
1141  nxg.add_edges_from([(
1142  b.first.name,
1143  b.second.name) for a in residue.atoms for b in a.GetBondList()])
1144 
1145  if by_atom_index:
1146  nxg = networkx.relabel_nodes(nxg,
1147  {a: b for a, b in zip(
1148  [a.name for a in residue.atoms],
1149  range(len(residue.atoms)))},
1150  True)
1151  return nxg
1152 
1153 def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1154  by_atom_index=False, return_symmetries=True,
1155  max_symmetries=1e6, model_graph = None,
1156  target_graph = None):
1157  """Return a list of symmetries (isomorphisms) of the model onto the target
1158  residues.
1159 
1160  :param model_ligand: The model ligand
1161  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1162  :class:`ost.mol.ResidueView`
1163  :param target_ligand: The target ligand
1164  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1165  :class:`ost.mol.ResidueView`
1166  :param substructure_match: Set this to True to allow partial ligands
1167  in the reference.
1168  :type substructure_match: :class:`bool`
1169  :param by_atom_index: Set this parameter to True if you need the symmetries
1170  to refer to atom index (within the residue).
1171  Otherwise, if False, the symmetries refer to atom
1172  names.
1173  :type by_atom_index: :class:`bool`
1174  :type return_symmetries: If Truthy, return the mappings, otherwise simply
1175  return True if a mapping is found (and raise if
1176  no mapping is found). This is useful to quickly
1177  find out if a mapping exist without the expensive
1178  step to find all the mappings.
1179  :type return_symmetries: :class:`bool`
1180  :param max_symmetries: If more than that many isomorphisms exist, raise
1181  a :class:`TooManySymmetriesError`. This can only be assessed by
1182  generating at least that many isomorphisms and can take some time.
1183  :type max_symmetries: :class:`int`
1184  :raises: :class:`NoSymmetryError` when no symmetry can be found;
1185  :class:`NoIsomorphicSymmetryError` in case of isomorphic
1186  subgraph but *substructure_match* is False;
1187  :class:`TooManySymmetriesError` when more than `max_symmetries`
1188  isomorphisms are found; :class:`DisconnectedGraphError` if
1189  graph for *model_ligand*/*target_ligand* is disconnected.
1190  """
1191 
1192  # Get the Graphs of the ligands
1193  if model_graph is None:
1194  model_graph = _ResidueToGraph(model_ligand,
1195  by_atom_index=by_atom_index)
1196 
1197  if not networkx.is_connected(model_graph):
1198  msg = "Disconnected graph for model ligand %s" % model_ligand
1199  raise DisconnectedGraphError(msg)
1200 
1201 
1202  if target_graph is None:
1203  target_graph = _ResidueToGraph(target_ligand,
1204  by_atom_index=by_atom_index)
1205 
1206  if not networkx.is_connected(target_graph):
1207  msg = "Disconnected graph for target ligand %s" % target_ligand
1208  raise DisconnectedGraphError(msg)
1209 
1210  # Note the argument order (model, target) which differs from spyrmsd.
1211  # This is because a subgraph of model is isomorphic to target - but not the
1212  # opposite as we only consider partial ligands in the reference.
1213  # Make sure to generate the symmetries correctly in the end
1214  gm = networkx.algorithms.isomorphism.GraphMatcher(
1215  model_graph, target_graph, node_match=lambda x, y:
1216  x["element"] == y["element"])
1217  if gm.is_isomorphic():
1218  if not return_symmetries:
1219  return True
1220  symmetries = []
1221  for i, isomorphism in enumerate(gm.isomorphisms_iter()):
1222  if i >= max_symmetries:
1223  raise TooManySymmetriesError(
1224  "Too many symmetries between %s and %s" % (
1225  str(model_ligand), str(target_ligand)))
1226  symmetries.append((list(isomorphism.values()),
1227  list(isomorphism.keys())))
1228  assert len(symmetries) > 0
1229  LogVerbose("Found %s isomorphic mappings (symmetries)" % len(symmetries))
1230  elif gm.subgraph_is_isomorphic() and substructure_match:
1231  if not return_symmetries:
1232  return True
1233  symmetries = []
1234  for i, isomorphism in enumerate(gm.subgraph_isomorphisms_iter()):
1235  if i >= max_symmetries:
1236  raise TooManySymmetriesError(
1237  "Too many symmetries between %s and %s" % (
1238  str(model_ligand), str(target_ligand)))
1239  symmetries.append((list(isomorphism.values()),
1240  list(isomorphism.keys())))
1241  assert len(symmetries) > 0
1242  # Assert that all the atoms in the target are part of the substructure
1243  assert len(symmetries[0][0]) == len(target_ligand.atoms)
1244  n_sym = len(symmetries)
1245  LogVerbose("Found %s subgraph isomorphisms (symmetries)" % n_sym)
1246  elif gm.subgraph_is_isomorphic():
1247  LogVerbose("Found subgraph isomorphisms (symmetries), but"
1248  " ignoring because substructure_match=False")
1249  raise NoIsomorphicSymmetryError("No symmetry between %s and %s" % (
1250  str(model_ligand), str(target_ligand)))
1251  else:
1252  LogVerbose("Found no isomorphic mappings (symmetries)")
1253  raise NoSymmetryError("No symmetry between %s and %s" % (
1254  str(model_ligand), str(target_ligand)))
1255 
1256  return symmetries
1257 
1258 class NoSymmetryError(ValueError):
1259  """ Exception raised when no symmetry can be found.
1260  """
1261  pass
1262 
1263 class NoIsomorphicSymmetryError(ValueError):
1264  """ Exception raised when no isomorphic symmetry can be found
1265 
1266  There would be isomorphic subgraphs for which symmetries can
1267  be found, but substructure_match is disabled
1268  """
1269  pass
1270 
1271 class TooManySymmetriesError(ValueError):
1272  """ Exception raised when too many symmetries are found.
1273  """
1274  pass
1275 
1276 class DisconnectedGraphError(Exception):
1277  """ Exception raised when the ligand graph is disconnected.
1278  """
1279  pass
1280 
1281 # specify public interface
1282 __all__ = ('LigandScorer', 'ComputeSymmetries', 'NoSymmetryError',
1283  'NoIsomorphicSymmetryError', 'TooManySymmetriesError',
1284  'DisconnectedGraphError')
Protein or molecule.
definition of EntityView
Definition: entity_view.hh:86
def __init__(self, model, target, model_ligands=None, target_ligands=None, resnum_alignments=False, rename_ligand_chain=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5)
def _compute(self, symmetries, target_ligand, model_ligand)
def _prepare_ligands(new_entity, old_entity, ligands, rename_chain)
def _get_report(self, ligand_state, pair_states)
def PopVerbosityLevel()
Definition: __init__.py:232
def PushVerbosityLevel(value)
Definition: __init__.py:229
def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)