OpenStructure
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
ligand_scoring_base.py
Go to the documentation of this file.
1 from contextlib import contextmanager
2 import numpy as np
3 import networkx
4 
5 import ost
6 from ost import io
7 from ost import mol
8 from ost import conop
9 from ost import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
10 from ost.mol.alg import chain_mapping
11 
12 
13 @contextmanager
14 def _SinkVerbosityLevel(n=1):
15  """ Context manager to temporarily reduce the verbosity level by n.
16 
17  Example usage:
18  with _SinkVerbosityLevel(2):
19  LogVerbose("Test")
20  Will only write "Test" in script level (2 above)
21  """
22  PushVerbosityLevel(GetVerbosityLevel() - n)
23  try:
24  yield
25  except:
26  raise
27  finally:
29 
30 
31 def _QualifiedAtomNotation(a):
32  """Return a parsable string of the atom in the format:
33  ChainName.ResidueNumber.InsertionCode.AtomName."""
34  resnum = a.residue.number
35  return "{cname}.{rnum}.{ins_code}.{aname}".format(
36  cname=a.chain.name,
37  rnum=resnum.num,
38  ins_code=resnum.ins_code.strip("\u0000"),
39  aname=a.name,
40  )
41 
42 
43 def _QualifiedResidueNotation(r):
44  """Return a parsable string of the residue in the format:
45  ChainName.ResidueNumber.InsertionCode."""
46  resnum = r.number
47  return "{cname}.{rnum}.{ins_code}".format(
48  cname=r.chain.name,
49  rnum=resnum.num,
50  ins_code=resnum.ins_code.strip("\u0000"),
51  )
52 
53 
54 def CleanHydrogens(ent, clib):
55  """ Ligand scoring helper - Returns copy of *ent* without hydrogens
56 
57  Non-standard hydrogen naming can cause trouble in residue property
58  assignment which is done by the :class:`ost.conop.RuleBasedProcessor` when
59  loading. In fact, residue property assignment is not done for every residue
60  that has unknown atoms according to the chemical component dictionary. This
61  function therefore re-processes the entity after removing hydrogens.
62 
63  :param ent: Entity to clean
64  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
65  :param clib: Compound library to perform re-processing after hydrogen
66  removal.
67  :type clib: :class:`ost.conop.CompoundLib`
68  :returns: Cleaned and re-processed ent
69  """
70  cleaned_ent = mol.CreateEntityFromView(ent.Select(
71  "ele != H and ele != D"), include_exlusive_atoms=False)
72  # process again to set missing residue properties due to non standard
73  # hydrogens
74  processor = conop.RuleBasedProcessor(clib)
75  processor.Process(cleaned_ent)
76  return cleaned_ent
77 
78 
79 def MMCIFPrep(mmcif_path, biounit=None, extract_nonpoly=False,
80  fault_tolerant=False):
81  """ Ligand scoring helper - Prepares :class:`LigandScorer` input from mmCIF
82 
83  Only performs gentle cleanup of hydrogen atoms. Further cleanup is delegated
84  to :class:`LigandScorer`.
85 
86  :param mmcif_path: Path to mmCIF file that contains polymer and optionally
87  non-polymer entities
88  :type mmcif_path: :class:`str`
89  :param biounit: If given, construct specified biounit from mmCIF AU
90  :type biounit: :class:`str`
91  :param extract_nonpoly: Additionally returns a list of
92  :class:`ost.mol.EntityHandle`
93  objects representing all non-polymer (ligand)
94  entities.
95  :type extract_nonpoly: :class:`bool`
96  :param fault_tolerant: Passed as parameter to :func:`ost.io.LoadMMCIF`
97  :type fault_tolerant: :class:`bool`
98  :returns: :class:`ost.mol.EntityHandle` which only contains polymer
99  entities representing the receptor structure. If *extract_nonpoly*
100  is True, a tuple is returned which additionally contains a
101  :class:`list` of :class:`ost.mol.EntityHandle`, where each
102  :class:`ost.mol.EntityHandle` represents a non-polymer (ligand).
103  """
104  clib = conop.GetDefaultLib()
105  if not clib:
106  ost.LogError("A compound library is required. "
107  "Please refer to the OpenStructure website: "
108  "https://openstructure.org/docs/conop/compoundlib/.")
109  raise RuntimeError("No compound library found")
110 
111  mmcif_entity, mmcif_info = io.LoadMMCIF(mmcif_path, info=True,
112  fault_tolerant=fault_tolerant)
113  mmcif_entity = CleanHydrogens(mmcif_entity, clib)
114 
115  # get AU chain names representing polymer entities
116  polymer_entity_ids = mmcif_info.GetEntityIdsOfType("polymer")
117  polymer_chain_names = list()
118  for ch in mmcif_entity.chains:
119  if mmcif_info.GetMMCifEntityIdTr(ch.name) in polymer_entity_ids:
120  polymer_chain_names.append(ch.name)
121 
122  # get AU chain names representing non-polymer entities
123  non_polymer_entity_ids = mmcif_info.GetEntityIdsOfType("non-polymer")
124  non_polymer_chain_names = list()
125  for ch in mmcif_entity.chains:
126  if mmcif_info.GetMMCifEntityIdTr(ch.name) in non_polymer_entity_ids:
127  non_polymer_chain_names.append(ch.name)
128 
129  # construct biounit if necessary
130  if biounit is not None:
131  biounit_found = False
132  for bu in mmcif_info.biounits:
133  if bu.id == biounit:
134  mmcif_entity = mol.alg.CreateBU(mmcif_entity, bu)
135  biounit_found = True
136  break
137  if not biounit_found:
138  raise RuntimeError(f"Specified biounit '{biounit}' not in "
139  f"{mmcif_path}")
140 
141  # assign generic properties for selection later on
142  non_poly_id = 0
143  for ch in mmcif_entity.chains:
144  cname = None
145  if biounit is not None:
146  # if a biounit is constructed, you get chain names like: 1.YOLO
147  # we cannot simply split by '.' since '.' is an allowed character
148  # in chain names. => split by first occurence
149  dot_index = ch.name.find('.')
150  if dot_index == -1:
151  cname = ch.name
152  else:
153  cname = ch.name[dot_index+1:]
154  else:
155  cname = ch.name
156 
157  if cname in polymer_chain_names:
158  ch.SetIntProp("poly", 1)
159  if cname in non_polymer_chain_names:
160  ch.SetIntProp("nonpolyid", non_poly_id)
161  non_poly_id += 1
162 
163  poly_sel = mmcif_entity.Select("gcpoly:0=1")
164  poly_ent = mol.CreateEntityFromView(poly_sel, True)
165 
166  if extract_nonpoly == False:
167  return poly_ent
168 
169  non_poly_sel = mmcif_entity.Select("gcnonpoly:0=1")
170  non_poly_entities = list()
171  for i in range(non_poly_id):
172  view = mmcif_entity.Select(f"gcnonpolyid:{non_poly_id}={i}")
173  if view.GetResidueCount() != 1:
174  raise RuntimeError(f"Expect non-polymer entities in "
175  f"{mmcif_path} to contain exactly 1 "
176  f"residue. Got {ch.GetResidueCount()} "
177  f"in chain {ch.name}")
178  compound = clib.FindCompound(view.residues[0].name)
179  if compound is None:
180  raise RuntimeError(f"Can only extract non-polymer entities if "
181  f"respective residues are available in PDB "
182  f"component dictionary. Can't find "
183  f"\"{view.residues[0].name}\"")
184 
185  non_poly_entities.append(mol.CreateEntityFromView(view, True))
186 
187  return (poly_ent, non_poly_entities)
188 
189 
190 def PDBPrep(pdb_path, fault_tolerant=False):
191  """ Ligand scoring helper - Prepares :class:`LigandScorer` input from PDB
192 
193  Only performs gentle cleanup of hydrogen atoms. Further cleanup is delegated
194  to :class:`LigandScorer`. There is no logic to extract ligands from PDB
195  files. Ligands must be provided separately as SDF files in these cases.
196 
197  :param pdb_path: Path to PDB file that contains polymer entities
198  :type pdb_path: :class:`str`
199  :param fault_tolerant: Passed as parameter to :func:`ost.io.LoadPDB`
200  :type fault_tolerant: :class:`bool`
201  :returns: :class:`EntityHandle` from loaded file.
202  """
203  clib = conop.GetDefaultLib()
204  if not clib:
205  ost.LogError("A compound library is required. "
206  "Please refer to the OpenStructure website: "
207  "https://openstructure.org/docs/conop/compoundlib/.")
208  raise RuntimeError("No compound library found")
209 
210  pdb_entity = io.LoadPDB(pdb_path, fault_tolerant=fault_tolerant)
211  pdb_entity = CleanHydrogens(pdb_entity, clib)
212 
213  return pdb_entity
214 
215 
217  """ Scorer to compute various small molecule ligand (non polymer) scores.
218 
219  :class:`LigandScorer` is an abstract base class dealing with all the setup,
220  data storage, enumerating ligand symmetries and target/model ligand
221  matching/assignment. But actual score computation is delegated to child
222  classes.
223 
224  At the moment, two such classes are available:
225 
226  * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
227  that assesses the conservation of protein-ligand
228  contacts (lDDT-PLI);
229  * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
230  that computes a binding-site superposed, symmetry-corrected RMSD
231  (BiSyRMSD) and ligand pocket lDDT (lDDT-LP).
232 
233  All versus all scores are available through the lazily computed
234  :attr:`score_matrix`. However, many things can go wrong... be it even
235  something as simple as two ligands not matching. Error states therefore
236  encode scoring issues. An Issue for a particular ligand is indicated by a
237  non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
238  This invalidates pairwise scores of such a ligand with all other ligands.
239  This and other issues in pairwise score computation are reported in
240  :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
241  Only if the respective location is 0, a valid pairwise score can be
242  expected. The states and their meaning can be explored with code::
243 
244  for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
245  print(state_code)
246  print(short_desc)
247  print(desc)
248 
249  A common use case is to derive a one-to-one mapping between ligands in
250  the model and the target for which :class:`LigandScorer` provides an
251  automated :attr:`assignment` procedure.
252  By default, only exact matches between target and model ligands are
253  considered. This is a problem when the target only contains a subset
254  of the expected atoms (for instance if atoms are missing in an
255  experimental structure, which often happens in the PDB). With
256  `substructure_match=True`, complete model ligands can be scored against
257  partial target ligands. One problem with this approach is that it is
258  very easy to find good matches to small, irrelevant ligands like EDO, CO2
259  or GOL. The assignment algorithm therefore considers the coverage,
260  expressed as the fraction of atoms of the model ligand atoms covered in the
261  target. Higher coverage matches are prioritized, but a match with a better
262  score will be preferred if it falls within a window of `coverage_delta`
263  (by default 0.2) of a worse-scoring match. As a result, for instance,
264  with a delta of 0.2, a low-score match with coverage 0.96 would be
265  preferred over a high-score match with coverage 0.70.
266 
267  Assumptions:
268 
269  Unlike most of OpenStructure, this class does not assume that the ligands
270  (either for the model or the target) are part of the PDB component
271  dictionary. They may have arbitrary residue names. Residue names do not
272  have to match between the model and the target. Matching is based on
273  the calculation of isomorphisms which depend on the atom element name and
274  atom connectivity (bond order is ignored).
275  It is up to the caller to ensure that the connectivity of atoms is properly
276  set before passing any ligands to this class. Ligands with improper
277  connectivity will lead to bogus results.
278 
279  This only applies to the ligand. The rest of the model and target
280  structures (protein, nucleic acids) must still follow the usual rules and
281  contain only residues from the compound library. Structures are cleaned up
282  according to constructor documentation. We advise to
283  use the :func:`MMCIFPrep` and :func:`PDBPrep` for loading which already
284  clean hydrogens and, in the case of MMCIF, optionally extract ligands ready
285  to be used by the :class:`LigandScorer` based on "non-polymer" entity types.
286  In case of PDB file format, ligands must be loaded separately as SDF files.
287 
288  Here is an example of how to setup a scorer::
289 
290  from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
291 
292  # Load data
293  # Structure model in PDB format, containing the receptor only
294  model = PDBPrep("path_to_model.pdb")
295  # Ligand model as SDF file
296  model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
297  # Target loaded from mmCIF, containing the ligand
298  target, target_ligands = MMCIFPrep("path_to_target.cif",
299  extract_nonpoly=True)
300 
301  # Setup scorer object and compute SCRMSD
302  model_ligands = [model_ligand.Select("ele != H")]
303  sc = SCRMSDScorer(model, target, model_ligands, target_ligands)
304 
305  # Perform assignment and read respective scores
306  for lig_pair in sc.assignment:
307  trg_lig = sc.target_ligands[lig_pair[0]]
308  mdl_lig = sc.model_ligands[lig_pair[1]]
309  score = sc.score_matrix[lig_pair[0], lig_pair[1]]
310  print(f"Score for {trg_lig} and {mdl_lig}: {score}")
311 
312  # check cleanup in model and target structure:
313  print("model cleanup:", sc.model_cleanup_log)
314  print("target cleanup:", sc.target_cleanup_log)
315 
316  :param model: Model structure - a deep copy is available as :attr:`model`.
317  The model undergoes the following cleanup steps which are
318  dependent on :class:`ost.conop.CompoundLib` returned by
319  :func:`ost.conop.GetDefaultLib`: 1) removal
320  of hydrogens, 2) removal of residues for which there is no
321  entry in :class:`ost.conop.CompoundLib`, 3) removal of
322  residues that are not peptide linking or nucleotide linking
323  according to :class:`ost.conop.CompoundLib` 4) removal of
324  atoms that are not defined for respective residues in
325  :class:`ost.conop.CompoundLib`. Except step 1), every cleanup
326  is logged with :class:`ost.LogLevel` Warning and a report is
327  available as :attr:`model_cleanup_log`.
328  :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
329  :param target: Target structure - same processing as *model*.
330  :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
331  :param model_ligands: Model ligands, as a list of
332  :class:`ost.mol.ResidueHandle`/
333  :class:`ost.mol.ResidueView`/
334  :class:`ost.mol.EntityHandle`/
335  :class:`ost.mol.EntityView`. For
336  :class:`ost.mol.EntityHandle`/
337  :class:`ost.mol.EntityView`, each residue is
338  considered to be an individual ligand.
339  All ligands are copied into a separate
340  :class:`ost.mol.EntityHandle` available as
341  :attr:`model_ligand_ent` and the respective
342  list of ligands is available as :attr:`model_ligands`.
343  :type model_ligands: :class:`list`
344  :param target_ligands: Target ligands, same processing as model ligands.
345  :type target_ligands: :class:`list`
346  :param resnum_alignments: Whether alignments between chemically equivalent
347  chains in *model* and *target* can be computed
348  based on residue numbers. This can be assumed in
349  benchmarking setups such as CAMEO/CASP.
350  :type resnum_alignments: :class:`bool`
351  :param substructure_match: Set this to True to allow incomplete (i.e.
352  partially resolved) target ligands.
353  :type substructure_match: :class:`bool`
354  :param coverage_delta: the coverage delta for partial ligand assignment.
355  :type coverage_delta: :class:`float`
356  :param max_symmetries: If more than that many isomorphisms exist for
357  a target-ligand pair, it will be ignored and reported
358  as unassigned.
359  :type max_symmetries: :class:`int`
360  """
361 
362  def __init__(self, model, target, model_ligands, target_ligands,
363  resnum_alignments=False, substructure_match=False,
364  coverage_delta=0.2, max_symmetries=1e5,
365  rename_ligand_chain=False):
366 
367  if isinstance(model, mol.EntityView):
368  self._model_model = mol.CreateEntityFromView(model, False)
369  elif isinstance(model, mol.EntityHandle):
370  self._model_model = model.Copy()
371  else:
372  raise RuntimeError("model must be of type EntityView/EntityHandle")
373 
374  if isinstance(target, mol.EntityView):
375  self._target_target = mol.CreateEntityFromView(target, False)
376  elif isinstance(target, mol.EntityHandle):
377  self._target_target = target.Copy()
378  else:
379  raise RuntimeError("target must be of type EntityView/EntityHandle")
380 
381  clib = conop.GetDefaultLib()
382  if not clib:
383  ost.LogError("A compound library is required. "
384  "Please refer to the OpenStructure website: "
385  "https://openstructure.org/docs/conop/compoundlib/.")
386  raise RuntimeError("No compound library found")
387  self._target_target, self._target_cleanup_log_target_cleanup_log = \
388  self._cleanup_polymer_ent_cleanup_polymer_ent(self._target_target, clib)
389  self._model_model, self._model_cleanup_log_model_cleanup_log = \
390  self._cleanup_polymer_ent_cleanup_polymer_ent(self._model_model, clib)
391 
392  # keep ligands separate from polymer entities
393  self._target_ligand_ent_target_ligand_ent = mol.CreateEntity()
394  self._model_ligand_ent_model_ligand_ent = mol.CreateEntity()
395  target_ligand_ent_ed = self._target_ligand_ent_target_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
396  model_ligand_ent_ed = self._model_ligand_ent_model_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
397 
398  self._target_ligands_target_ligands = list()
399  for l in target_ligands:
400  if isinstance(l, mol.EntityView) or isinstance(l, mol.EntityHandle):
401  for r in l.residues:
402  self._target_ligands_target_ligands.append(self._copy_ligand_copy_ligand(r, self._target_ligand_ent_target_ligand_ent,
403  target_ligand_ent_ed,
404  rename_ligand_chain))
405  elif isinstance(l, mol.ResidueHandle) or isinstance(l, mol.ResidueView):
406  self._target_ligands_target_ligands.append(self._copy_ligand_copy_ligand(l, self._target_ligand_ent_target_ligand_ent,
407  target_ligand_ent_ed,
408  rename_ligand_chain))
409  else:
410  raise RuntimeError("ligands must be of type EntityView/"
411  "EntityHandle/ResidueView/ResidueHandle")
412 
413  self._model_ligands_model_ligands = list()
414  for l in model_ligands:
415  if isinstance(l, mol.EntityView) or isinstance(l, mol.EntityHandle):
416  for r in l.residues:
417  self._model_ligands_model_ligands.append(self._copy_ligand_copy_ligand(r, self._model_ligand_ent_model_ligand_ent,
418  model_ligand_ent_ed,
419  rename_ligand_chain))
420  elif isinstance(l, mol.ResidueHandle) or isinstance(l, mol.ResidueView):
421  self._model_ligands_model_ligands.append(self._copy_ligand_copy_ligand(l, self._model_ligand_ent_model_ligand_ent,
422  model_ligand_ent_ed,
423  rename_ligand_chain))
424  else:
425  raise RuntimeError("ligands must be of type EntityView/"
426  "EntityHandle/ResidueView/ResidueHandle")
427 
428 
429  if len(self.model_ligandsmodel_ligands) == 0:
430  LogWarning("No ligands in the model")
431  if len(self.target_ligandstarget_ligands) == 0:
432  raise ValueError("No ligand in the model and in the target")
433 
434  self._resnum_alignments_resnum_alignments = resnum_alignments
435  self._substructure_match_substructure_match = substructure_match
436  self._coverage_delta_coverage_delta = coverage_delta
437  self._max_symmetries_max_symmetries = max_symmetries
438 
439  # lazily computed attributes
440  self.__chain_mapper__chain_mapper = None
441 
442  # keep track of states
443  # simple integers instead of enums - encoding available in
444  # self.state_decoding
445  self._state_matrix_state_matrix = None
446  self._model_ligand_states_model_ligand_states = None
447  self._target_ligand_states_target_ligand_states = None
448 
449  # score matrices
450  self._score_matrix_score_matrix = None
451  self._coverage_matrix_coverage_matrix = None
452  self._aux_matrix_aux_matrix = None
453 
454  # assignment and derived data
455  self._assignment_assignment = None
456  self._score_dict_score_dict = None
457  self._aux_dict_aux_dict = None
458 
459  # human readable description of states - child class must extend with
460  # with child class specific states
461  # each state code comes with a tuple of two elements:
462  # 1) short description 2) human readable description
463  # The actual states are set in _compute_scores in :class:`LigandScorer`
464  # or _compute_score of the child class.
465  if self.substructure_matchsubstructure_match:
466  iso = "subgraph isomorphism"
467  else:
468  iso = "full graph isomorphism"
469 
470  self.state_decodingstate_decoding = \
471  {0: ("OK", "OK"),
472  1: ("identity", f"Ligands could not be matched (by {iso})"),
473  2: ("symmetries", "Too many symmetries between ligand atoms were "
474  "found - increasing max_symmetries might help"),
475  3: ("no_iso", "No full isomorphic match could be found - enabling "
476  "substructure_match might allow a match"),
477  4: ("disconnected", "Ligand graph is disconnected"),
478  5: ("stoichiometry", "Ligand was already assigned to another ligand "
479  "(different stoichiometry)"),
480  6: ("single_ligand_issue", "Cannot compute valid pairwise score as "
481  "either model or target ligand have non-zero state."),
482  9: ("unknown", "An unknown error occured in LigandScorer")}
483 
484 
485  @property
486  def model(self):
487  """ Model receptor structure
488 
489  Processed according to docs in :class:`LigandScorer` constructor
490  """
491  return self._model_model
492 
493  @property
494  def target(self):
495  """ Target receptor structure
496 
497  Processed according to docs in :class:`LigandScorer` constructor
498  """
499  return self._target_target
500 
501  @property
502  def model_cleanup_log(self):
503  """ Reports residues/atoms that were removed in model during cleanup
504 
505  Residues and atoms are described as :class:`str` in format
506  <chain_name>.<resnum>.<ins_code> (residue) and
507  <chain_name>.<resnum>.<ins_code>.<aname> (atom).
508 
509  :class:`dict` with keys:
510 
511  * 'cleaned_residues': another :class:`dict` with keys:
512 
513  * 'no_clib': residues that have been removed because no entry could be
514  found in :class:`ost.conop.CompoundLib`
515  * 'not_linking': residues that have been removed because they're not
516  peptide or nucleotide linking according to
517  :class:`ost.conop.CompoundLib`
518 
519  * 'cleaned_atoms': another :class:`dict` with keys:
520 
521  * 'unknown_atoms': atoms that have been removed as they're not part
522  of their respective residue according to
523  :class:`ost.conop.CompoundLib`
524  """
525  return self._model_cleanup_log_model_cleanup_log
526 
527  @property
529  """ Same for target
530  """
531  return self._target_cleanup_log_target_cleanup_log
532 
533  @property
534  def model_ligands(self):
535  """ Residues representing model ligands
536 
537  :class:`list` of :class:`ost.mol.ResidueHandle`
538  """
539  return self._model_ligands_model_ligands
540 
541  @property
542  def target_ligands(self):
543  """ Residues representing target ligands
544 
545  :class:`list` of :class:`ost.mol.ResidueHandle`
546  """
547  return self._target_ligands_target_ligands
548 
549  @property
550  def resnum_alignments(self):
551  """ Given at :class:`LigandScorer` construction
552  """
553  return self._resnum_alignments_resnum_alignments
554 
555  @property
557  """ Given at :class:`LigandScorer` construction
558  """
559  return self._substructure_match_substructure_match
560 
561  @property
562  def coverage_delta(self):
563  """ Given at :class:`LigandScorer` construction
564  """
565  return self._coverage_delta_coverage_delta
566 
567  @property
568  def max_symmetries(self):
569  """ Given at :class:`LigandScorer` construction
570  """
571  return self._max_symmetries_max_symmetries
572 
573  @property
574  def state_matrix(self):
575  """ Encodes states of ligand pairs
576 
577  Ligand pairs can be matched and a valid score can be expected if
578  respective location in this matrix is 0.
579  Target ligands are in rows, model ligands in columns. States are encoded
580  as integers <= 9. Larger numbers encode errors for child classes.
581  Use something like ``self.state_decoding[3]`` to get a decscription.
582 
583  :rtype: :class:`~numpy.ndarray`
584  """
585  if self._state_matrix_state_matrix is None:
586  self._compute_scores_compute_scores()
587  return self._state_matrix_state_matrix
588 
589  @property
591  """ Encodes states of model ligands
592 
593  Non-zero state in any of the model ligands invalidates the full
594  respective column in :attr:`~state_matrix`.
595 
596  :rtype: :class:`~numpy.ndarray`
597  """
598  if self._model_ligand_states_model_ligand_states is None:
599  self._compute_scores_compute_scores()
600  return self._model_ligand_states_model_ligand_states
601 
602  @property
604  """ Encodes states of target ligands
605 
606  Non-zero state in any of the target ligands invalidates the full
607  respective row in :attr:`~state_matrix`.
608 
609  :rtype: :class:`~numpy.ndarray`
610  """
611  if self._target_ligand_states_target_ligand_states is None:
612  self._compute_scores_compute_scores()
613  return self._target_ligand_states_target_ligand_states
614 
615  @property
616  def score_matrix(self):
617  """ Get the matrix of scores.
618 
619  Target ligands are in rows, model ligands in columns.
620 
621  NaN values indicate that no value could be computed (i.e. different
622  ligands). In other words: values are only valid if the respective
623  location in :attr:`~state_matrix` is 0.
624 
625  :rtype: :class:`~numpy.ndarray`
626  """
627  if self._score_matrix_score_matrix is None:
628  self._compute_scores_compute_scores()
629  return self._score_matrix_score_matrix
630 
631  @property
632  def coverage_matrix(self):
633  """ Get the matrix of model ligand atom coverage in the target.
634 
635  Target ligands are in rows, model ligands in columns.
636 
637  NaN values indicate that no value could be computed (i.e. different
638  ligands). In other words: values are only valid if the respective
639  location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
640  only full match isomorphisms are considered, and therefore only values
641  of 1.0 can be observed.
642 
643  :rtype: :class:`~numpy.ndarray`
644  """
645  if self._coverage_matrix_coverage_matrix is None:
646  self._compute_scores_compute_scores()
647  return self._coverage_matrix_coverage_matrix
648 
649  @property
650  def aux_matrix(self):
651  """ Get the matrix of scorer specific auxiliary data.
652 
653  Target ligands are in rows, model ligands in columns.
654 
655  Auxiliary data consists of arbitrary data dicts which allow a child
656  class to provide additional information for a scored ligand pair.
657  empty dictionaries indicate that the child class simply didn't return
658  anything or that no value could be computed (e.g. different ligands).
659  In other words: values are only valid if respective location in the
660  :attr:`~state_matrix` is 0.
661 
662  :rtype: :class:`~numpy.ndarray`
663  """
664  if self._aux_matrix_aux_matrix is None:
665  self._compute_scores_compute_scores()
666  return self._aux_matrix_aux_matrix
667 
668  @property
669  def assignment(self):
670  """ Ligand assignment based on computed scores
671 
672  Implements a greedy algorithm to assign target and model ligands
673  with each other. Starts from each valid ligand pair as indicated
674  by a state of 0 in :attr:`state_matrix`. Each iteration first selects
675  high coverage pairs. Given max_coverage defined as the highest
676  coverage observed in the available pairs, all pairs with coverage
677  in [max_coverage-*coverage_delta*, max_coverage] are selected.
678  The best scoring pair among those is added to the assignment
679  and the whole process is repeated until there are no ligands to
680  assign anymore.
681 
682  :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
683  """
684  if self._assignment_assignment is None:
685  self._assignment_assignment = list()
686  # Build working array that contains tuples for all mdl/trg ligand
687  # pairs with valid score as indicated by a state of 0:
688  # (score, coverage, trg_ligand_idx, mdl_ligand_idx)
689  tmp = list()
690  for trg_idx in range(self.score_matrixscore_matrix.shape[0]):
691  for mdl_idx in range(self.score_matrixscore_matrix.shape[1]):
692  if self.state_matrixstate_matrix[trg_idx, mdl_idx] == 0:
693  tmp.append((self.score_matrixscore_matrix[trg_idx, mdl_idx],
694  self.coverage_matrixcoverage_matrix[trg_idx, mdl_idx],
695  trg_idx, mdl_idx))
696 
697  # sort by score, such that best scoring item is in front
698  if self._score_dir_score_dir() == '+':
699  tmp.sort(reverse=True)
700  elif self._score_dir_score_dir() == '-':
701  tmp.sort()
702  else:
703  raise RuntimeError("LigandScorer._score_dir must return one in "
704  "['+', '-']")
705 
706  LogScript("Computing ligand assignment")
707  while len(tmp) > 0:
708  # select high coverage ligand pairs in working array
709  coverage_thresh = max([x[1] for x in tmp]) - self.coverage_deltacoverage_delta
710  top_coverage = [x for x in tmp if x[1] >= coverage_thresh]
711 
712  # working array is sorted by score => just pick first one
713  a = top_coverage[0][2] # selected trg_ligand_idx
714  b = top_coverage[0][3] # selected mdl_ligand_idx
715  self._assignment_assignment.append((a, b))
716 
717  # kick out remaining pairs involving these ligands
718  tmp = [x for x in tmp if (x[2] != a and x[3] != b)]
719 
720  return self._assignment_assignment
721 
722  @property
723  def score(self):
724  """ Get a dictionary of score values, keyed by model ligand
725 
726  Extract score with something like:
727  ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
728  The returned scores are based on :attr:`~assignment`.
729 
730  :rtype: :class:`dict`
731  """
732  if self._score_dict_score_dict is None:
733  self._score_dict_score_dict = dict()
734  for (trg_lig_idx, mdl_lig_idx) in self.assignmentassignment:
735  mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
736  cname = mdl_lig.GetChain().GetName()
737  rnum = mdl_lig.GetNumber()
738  if cname not in self._score_dict_score_dict:
739  self._score_dict_score_dict[cname] = dict()
740  score = self.score_matrixscore_matrix[trg_lig_idx, mdl_lig_idx]
741  self._score_dict_score_dict[cname][rnum] = score
742  return self._score_dict_score_dict
743 
744  @property
745  def aux(self):
746  """ Get a dictionary of score details, keyed by model ligand
747 
748  Extract dict with something like:
749  ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
750  The returned info dicts are based on :attr:`~assignment`. The content is
751  documented in the respective child class.
752 
753  :rtype: :class:`dict`
754  """
755  if self._aux_dict_aux_dict is None:
756  self._aux_dict_aux_dict = dict()
757  for (trg_lig_idx, mdl_lig_idx) in self.assignmentassignment:
758  mdl_lig = self.model_ligandsmodel_ligands[mdl_lig_idx]
759  cname = mdl_lig.GetChain().GetName()
760  rnum = mdl_lig.GetNumber()
761  if cname not in self._aux_dict_aux_dict:
762  self._aux_dict_aux_dict[cname] = dict()
763  d = self.aux_matrixaux_matrix[trg_lig_idx, mdl_lig_idx]
764  self._aux_dict_aux_dict[cname][rnum] = d
765  return self._aux_dict_aux_dict
766 
767 
768  @property
770  """ Get indices of target ligands which are not assigned
771 
772  :rtype: :class:`list` of :class:`int`
773  """
774  # compute on-the-fly, no need for caching
775  assigned = set([x[0] for x in self.assignmentassignment])
776  return [x for x in range(len(self.target_ligandstarget_ligands)) if x not in assigned]
777 
778  @property
780  """ Get indices of model ligands which are not assigned
781 
782  :rtype: :class:`list` of :class:`int`
783  """
784  # compute on-the-fly, no need for caching
785  assigned = set([x[1] for x in self.assignmentassignment])
786  return [x for x in range(len(self.model_ligandsmodel_ligands)) if x not in assigned]
787 
788  def get_target_ligand_state_report(self, trg_lig_idx):
789  """ Get summary of states observed with respect to all model ligands
790 
791  Mainly for debug purposes
792 
793  :param trg_lig_idx: Index of target ligand for which report should be
794  generated
795  :type trg_lig_idx: :class:`int`
796  """
797  return self._get_report_get_report(self.target_ligand_statestarget_ligand_states[trg_lig_idx],
798  self.state_matrixstate_matrix[trg_lig_idx,:])
799 
800  def get_model_ligand_state_report(self, mdl_lig_idx):
801  """ Get summary of states observed with respect to all target ligands
802 
803  Mainly for debug purposes
804 
805  :param mdl_lig_idx: Index of model ligand for which report should be
806  generated
807  :type mdl_lig_idx: :class:`int`
808  """
809  return self._get_report_get_report(self.model_ligand_statesmodel_ligand_states[mdl_lig_idx],
810  self.state_matrixstate_matrix[:, mdl_lig_idx])
811 
812  def _get_report(self, ligand_state, pair_states):
813  """ Helper
814  """
815  pair_report = list()
816  for s in np.unique(pair_states):
817  desc = self.state_decodingstate_decoding[s]
818  indices = np.flatnonzero(pair_states == s).tolist()
819  pair_report.append({"state": s,
820  "short desc": desc[0],
821  "desc": desc[1],
822  "indices": indices})
823 
824  desc = self.state_decodingstate_decoding[ligand_state]
825  ligand_report = {"state": ligand_state,
826  "short desc": desc[0],
827  "desc": desc[1]}
828 
829  return (ligand_report, pair_report)
830 
832  """ Makes an educated guess why target ligand is not assigned
833 
834  This either returns actual error states or custom states that are
835  derived from them. Currently, the following reasons are reported:
836 
837  * `no_ligand`: there was no ligand in the model.
838  * `disconnected`: the ligand graph was disconnected.
839  * `identity`: the ligand was not found in the model (by graph
840  isomorphism). Check your ligand connectivity.
841  * `no_iso`: no full isomorphic match could be found. Try enabling
842  `substructure_match=True` if the target ligand is incomplete.
843  * `symmetries`: too many symmetries were found (by graph isomorphisms).
844  Try to increase `max_symmetries`.
845  * `stoichiometry`: there was a possible assignment in the model, but
846  the model ligand was already assigned to a different target ligand.
847  This indicates different stoichiometries.
848  * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
849  the binding site and the ligand, and lDDT-PLI is undefined.
850  * `target_binding_site` (SCRMSD only): no polymer residues were in
851  proximity of the target ligand.
852  * `model_binding_site` (SCRMSD only): the binding site was not found
853  in the model. Either the binding site was not modeled or the model
854  ligand was positioned too far in combination with
855  `full_bs_search=False`.
856 
857  :param trg_lig_idx: Index of target ligand
858  :type trg_lig_idx: :class:`int`
859  :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
860  sentence describing the issue, (\"unknown\",\"unknown\") if
861  nothing obvious can be found.
862  :raises: :class:`RuntimeError` if specified target ligand is assigned
863  """
864  if trg_lig_idx not in self.unassigned_target_ligandsunassigned_target_ligands:
865  raise RuntimeError("Specified target ligand is not unassigned")
866 
867  # hardcoded tuple if there is simply nothing we can assign it to
868  if len(self.model_ligandsmodel_ligands) == 0:
869  return ("no_ligand", "No ligand in the model")
870 
871  # if something with the ligand itself is wrong, we can be pretty sure
872  # thats why the ligand is unassigned
873  if self.target_ligand_statestarget_ligand_states[trg_lig_idx] != 0:
874  return self.state_decodingstate_decoding[self.target_ligand_statestarget_ligand_states[trg_lig_idx]]
875 
876  # The next best guess comes from looking at pair states
877  tmp = np.unique(self.state_matrixstate_matrix[trg_lig_idx,:])
878 
879  # In case of any 0, it could have been assigned so it's probably
880  # just not selected due to different stoichiometry - this is no
881  # defined state, we just return a hardcoded tuple in this case
882  if 0 in tmp:
883  return ("stoichiometry",
884  "Ligand was already assigned to an other "
885  "model ligand (different stoichiometry)")
886 
887  # maybe it's a symmetry issue?
888  if 2 in tmp:
889  return self.state_decodingstate_decoding[2]
890 
891  # if the state is 6 (single_ligand_issue), there is an issue with its
892  # target counterpart.
893  if 6 in tmp:
894  mdl_idx = np.where(self.state_matrixstate_matrix[trg_lig_idx,:]==6)[0]
895  for i in mdl_idx:
896  if self.model_ligand_statesmodel_ligand_states[i] == 0:
897  raise RuntimeError("This should never happen")
898  if self.model_ligand_statesmodel_ligand_states[i] != 4 or len(tmp) == 1:
899  # Don't report disconnected if only 1 model ligand is
900  # disconnected, unless that's the only reason
901  return self.state_decodingstate_decoding[self.model_ligand_statesmodel_ligand_states[i]]
902 
903  # get rid of remaining single ligand issues (only disconnected error)
904  if 6 in tmp and len(tmp) > 1:
905  tmp = tmp[tmp!=6]
906 
907  # prefer everything over identity state
908  if 1 in tmp and len(tmp) > 1:
909  tmp = tmp[tmp!=1]
910 
911  # just return whatever is left
912  return self.state_decodingstate_decoding[tmp[0]]
913 
914 
915  def guess_model_ligand_unassigned_reason(self, mdl_lig_idx):
916  """ Makes an educated guess why model ligand is not assigned
917 
918  This either returns actual error states or custom states that are
919  derived from them. Currently, the following reasons are reported:
920 
921  * `no_ligand`: there was no ligand in the target.
922  * `disconnected`: the ligand graph is disconnected.
923  * `identity`: the ligand was not found in the target (by graph or
924  subgraph isomorphism). Check your ligand connectivity.
925  * `no_iso`: no full isomorphic match could be found. Try enabling
926  `substructure_match=True` if the target ligand is incomplete.
927  * `symmetries`: too many symmetries were found (by graph isomorphisms).
928  Try to increase `max_symmetries`.
929  * `stoichiometry`: there was a possible assignment in the target, but
930  the model target was already assigned to a different model ligand.
931  This indicates different stoichiometries.
932  * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
933  the binding site and the ligand, and lDDT-PLI is undefined.
934  * `target_binding_site` (SCRMSD only): a potential assignment was found
935  in the target, but there were no polymer residues in proximity of the
936  ligand in the target.
937  * `model_binding_site` (SCRMSD only): a potential assignment was
938  found in the target, but no binding site was found in the model.
939  Either the binding site was not modeled or the model ligand was
940  positioned too far in combination with `full_bs_search=False`.
941 
942  :param mdl_lig_idx: Index of model ligand
943  :type mdl_lig_idx: :class:`int`
944  :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
945  sentence describing the issue, (\"unknown\",\"unknown\") if
946  nothing obvious can be found.
947  :raises: :class:`RuntimeError` if specified model ligand is assigned
948  """
949  if mdl_lig_idx not in self.unassigned_model_ligandsunassigned_model_ligands:
950  raise RuntimeError("Specified model ligand is not unassigned")
951 
952  # hardcoded tuple if there is simply nothing we can assign it to
953  if len(self.target_ligandstarget_ligands) == 0:
954  return ("no_ligand", "No ligand in the target")
955 
956  # if something with the ligand itself is wrong, we can be pretty sure
957  # thats why the ligand is unassigned
958  if self.model_ligand_statesmodel_ligand_states[mdl_lig_idx] != 0:
959  return self.state_decodingstate_decoding[self.model_ligand_statesmodel_ligand_states[mdl_lig_idx]]
960 
961  # The next best guess comes from looking at pair states
962  tmp = np.unique(self.state_matrixstate_matrix[:,mdl_lig_idx])
963 
964  # In case of any 0, it could have been assigned so it's probably
965  # just not selected due to different stoichiometry - this is no
966  # defined state, we just return a hardcoded tuple in this case
967  if 0 in tmp:
968  return ("stoichiometry",
969  "Ligand was already assigned to an other "
970  "model ligand (different stoichiometry)")
971 
972  # maybe its a symmetry issue?
973  if 2 in tmp:
974  return self.state_decodingstate_decoding[2]
975 
976  # if the state is 6 (single_ligand_issue), there is an issue with its
977  # target counterpart.
978  if 6 in tmp:
979  trg_idx = np.where(self.state_matrixstate_matrix[:,mdl_lig_idx]==6)[0]
980  for i in trg_idx:
981  if self.target_ligand_statestarget_ligand_states[i] == 0:
982  raise RuntimeError("This should never happen")
983  if self.target_ligand_statestarget_ligand_states[i] != 4 or len(tmp) == 1:
984  # Don't report disconnected if only 1 target ligand is
985  # disconnected, unless that's the only reason
986  return self.state_decodingstate_decoding[self.target_ligand_statestarget_ligand_states[i]]
987 
988  # get rid of remaining single ligand issues (only disconnected error)
989  if 6 in tmp and len(tmp) > 1:
990  tmp = tmp[tmp!=6]
991 
992  # prefer everything over identity state
993  if 1 in tmp and len(tmp) > 1:
994  tmp = tmp[tmp!=1]
995 
996  # just return whatever is left
997  return self.state_decodingstate_decoding[tmp[0]]
998 
999  @property
1001  return_dict = dict()
1002  for i in self.unassigned_model_ligandsunassigned_model_ligands:
1003  lig = self.model_ligandsmodel_ligands[i]
1004  cname = lig.GetChain().GetName()
1005  rnum = lig.GetNumber()
1006  if cname not in return_dict:
1007  return_dict[cname] = dict()
1008  return_dict[cname][rnum] = \
1009  self.guess_model_ligand_unassigned_reasonguess_model_ligand_unassigned_reason(i)[0]
1010  return return_dict
1011 
1012  @property
1014  return_dict = dict()
1015  for i in self.unassigned_target_ligandsunassigned_target_ligands:
1016  lig = self.target_ligandstarget_ligands[i]
1017  cname = lig.GetChain().GetName()
1018  rnum = lig.GetNumber()
1019  if cname not in return_dict:
1020  return_dict[cname] = dict()
1021  return_dict[cname][rnum] = \
1022  self.guess_target_ligand_unassigned_reasonguess_target_ligand_unassigned_reason(i)[0]
1023  return return_dict
1024 
1025  @property
1026  def _chain_mapper(self):
1027  """ Chain mapper object for the given :attr:`target`.
1028 
1029  Can be used by child classes if needed, constructed with
1030  *resnum_alignments* flag
1031 
1032  :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
1033  """
1034  if self.__chain_mapper__chain_mapper is None:
1035  with _SinkVerbosityLevel():
1036  self.__chain_mapper__chain_mapper = \
1037  chain_mapping.ChainMapper(self.targettarget,
1038  n_max_naive=1e9,
1039  resnum_alignments=self.resnum_alignmentsresnum_alignments)
1040  return self.__chain_mapper__chain_mapper
1041 
1042  def _compute_scores(self):
1043  """
1044  Compute score for every possible target-model ligand pair and store the
1045  result in internal matrices.
1046  """
1047 
1050  shape = (len(self.target_ligandstarget_ligands), len(self.model_ligandsmodel_ligands))
1051  self._score_matrix_score_matrix = np.full(shape, np.nan, dtype=np.float32)
1052  self._coverage_matrix_coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
1053  self._state_matrix_state_matrix = np.full(shape, 0, dtype=np.int32)
1054  self._model_ligand_states_model_ligand_states = np.zeros(len(self.model_ligandsmodel_ligands))
1055  self._target_ligand_states_target_ligand_states = np.zeros(len(self.target_ligandstarget_ligands))
1056  self._aux_matrix_aux_matrix = np.empty(shape, dtype=dict)
1057 
1058  # precompute ligand graphs
1059  target_graphs = \
1060  [_ResidueToGraph(l, by_atom_index=True) for l in self.target_ligandstarget_ligands]
1061  model_graphs = \
1062  [_ResidueToGraph(l, by_atom_index=True) for l in self.model_ligandsmodel_ligands]
1063 
1064  for g_idx, g in enumerate(target_graphs):
1065  if not networkx.is_connected(g):
1066  self._target_ligand_states_target_ligand_states[g_idx] = 4
1067  self._state_matrix_state_matrix[g_idx,:] = 6
1068  msg = "Disconnected graph observed for target ligand "
1069  msg += str(self.target_ligandstarget_ligands[g_idx])
1070  LogWarning(msg)
1071 
1072  for g_idx, g in enumerate(model_graphs):
1073  if not networkx.is_connected(g):
1074  self._model_ligand_states_model_ligand_states[g_idx] = 4
1075  self._state_matrix_state_matrix[:,g_idx] = 6
1076  msg = "Disconnected graph observed for model ligand "
1077  msg += str(self.model_ligandsmodel_ligands[g_idx])
1078  LogWarning(msg)
1079 
1080 
1081  LogScript("Computing pairwise scores for all %s x %s ligands" % shape)
1082  for target_id, target_ligand in enumerate(self.target_ligandstarget_ligands):
1083  LogInfo("Analyzing target ligand %s" % target_ligand)
1084 
1085  if self._target_ligand_states_target_ligand_states[target_id] == 4:
1086  # Disconnected graph - already updated states and reported
1087  # to LogVerbose
1088  continue
1089 
1090  for model_id, model_ligand in enumerate(self.model_ligandsmodel_ligands):
1091  LogInfo("Comparing to model ligand %s" % model_ligand)
1092 
1093 
1096 
1097  if self._model_ligand_states_model_ligand_states[model_id] == 4:
1098  # Disconnected graph - already updated states and reported
1099  # to LogVerbose
1100  continue
1101 
1102  try:
1103  symmetries = ComputeSymmetries(
1104  model_ligand, target_ligand,
1105  substructure_match=self.substructure_matchsubstructure_match,
1106  by_atom_index=True,
1107  max_symmetries=self.max_symmetriesmax_symmetries,
1108  model_graph = model_graphs[model_id],
1109  target_graph = target_graphs[target_id])
1110  LogInfo("Ligands %s and %s symmetry match" % (
1111  str(model_ligand), str(target_ligand)))
1112  except NoSymmetryError:
1113  # Ligands are different - skip
1114  LogInfo("No symmetry between %s and %s" % (
1115  str(model_ligand), str(target_ligand)))
1116  self._state_matrix_state_matrix[target_id, model_id] = 1
1117  continue
1118  except TooManySymmetriesError:
1119  # Ligands are too symmetrical - skip
1120  LogWarning("Too many symmetries between %s and %s" % (
1121  str(model_ligand), str(target_ligand)))
1122  self._state_matrix_state_matrix[target_id, model_id] = 2
1123  continue
1124  except NoIsomorphicSymmetryError:
1125  # Ligands are different - skip
1126  LogInfo("No isomorphic symmetry between %s and %s" % (
1127  str(model_ligand), str(target_ligand)))
1128  self._state_matrix_state_matrix[target_id, model_id] = 3
1129  continue
1130  except DisconnectedGraphError:
1131  # this should never happen as we guard against
1132  # DisconnectedGraphError when precomputing the graph
1133  LogError("Disconnected graph observed for %s and %s" % (
1134  str(model_ligand), str(target_ligand)))
1135  # just set both ligand states to 4
1136  self._model_ligand_states_model_ligand_states[model_id] = 4
1137  self._model_ligand_states_model_ligand_states[target_id] = 4
1138  self._state_matrix_state_matrix[target_id, model_id] = 6
1139  continue
1140 
1141 
1144  score, pair_state, target_ligand_state, model_ligand_state, aux\
1145  = self._compute_compute(symmetries, target_ligand, model_ligand)
1146 
1147 
1150 
1151  # Ensure that returned states are associated with a
1152  # description. This is a requirement when subclassing
1153  # LigandScorer => state_decoding dict from base class must
1154  # be modified in subclass constructor
1155  if pair_state not in self.state_decodingstate_decoding or \
1156  target_ligand_state not in self.state_decodingstate_decoding or \
1157  model_ligand_state not in self.state_decodingstate_decoding:
1158  raise RuntimeError(f"Subclass returned state "
1159  f"\"{state}\" for which no "
1160  f"description is available. Point "
1161  f"the developer of the used scorer "
1162  f"to this error message.")
1163 
1164  # if any of the ligand states is non-zero, this must be marked
1165  # by the scorer subclass by specifying a specific pair state
1166  if target_ligand_state != 0 and pair_state != 6:
1167  raise RuntimeError("Observed non-zero target-ligand "
1168  "state which must trigger certain "
1169  "pair state. Point the developer of "
1170  "the used scorer to this error message")
1171 
1172  if model_ligand_state != 0 and pair_state != 6:
1173  raise RuntimeError("Observed non-zero model-ligand "
1174  "state which must trigger certain "
1175  "pair state. Point the developer of "
1176  "the used scorer to this error message")
1177 
1178  self._state_matrix_state_matrix[target_id, model_id] = pair_state
1179  self._target_ligand_states_target_ligand_states[target_id] = target_ligand_state
1180  self._model_ligand_states_model_ligand_states[model_id] = model_ligand_state
1181  if pair_state == 0:
1182  if score is None or np.isnan(score):
1183  raise RuntimeError("LigandScorer returned invalid "
1184  "score despite 0 error state")
1185  # it's a valid score!
1186  self._score_matrix_score_matrix[target_id, model_id] = score
1187  cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1188  self._coverage_matrix_coverage_matrix[target_id, model_id] = cvg
1189  self._aux_matrix_aux_matrix[target_id, model_id] = aux
1190 
1191  def _compute(self, symmetries, target_ligand, model_ligand):
1192  """ Compute score for specified ligand pair - defined by child class
1193 
1194  Raises :class:`NotImplementedError` if not implemented by child class.
1195 
1196  :param symmetries: Defines symmetries between *target_ligand* and
1197  *model_ligand*. Return value of
1198  :func:`ComputeSymmetries`
1199  :type symmetries: :class:`list` of :class:`tuple` with two elements
1200  each: 1) :class:`list` of atom indices in
1201  *target_ligand* 2) :class:`list` of respective atom
1202  indices in *model_ligand*
1203  :param target_ligand: The target ligand
1204  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1205  :class:`ost.mol.ResidueView`
1206  :param model_ligand: The model ligand
1207  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1208  :class:`ost.mol.ResidueView`
1209 
1210  :returns: A :class:`tuple` with three elements: 1) a score
1211  (:class:`float`) 2) state (:class:`int`).
1212  3) auxiliary data for this ligand pair (:class:`dict`).
1213  If state is 0, the score and auxiliary data will be
1214  added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1215  as the respective value in :attr:`~coverage_matrix`.
1216  Returned score must be valid in this case (not None/NaN).
1217  Child specific non-zero states must be >= 10.
1218  """
1219  raise NotImplementedError("_compute must be implemented by child "
1220  "class")
1221 
1222  def _score_dir(self):
1223  """ Return direction of score - defined by child class
1224 
1225  Relevant for ligand assignment. Must return a string in ['+', '-'].
1226  '+' for ascending scores, i.e. higher is better (lddt etc.)
1227  '-' for descending scores, i.e. lower is better (rmsd etc.)
1228  """
1229  raise NotImplementedError("_score_dir must be implemented by child "
1230  "class")
1231 
1232  def _copy_ligand(self, l, ent, ed, rename_ligand_chain):
1233  """ Copies ligand into entity and returns residue handle
1234  """
1235  new_chain = ent.FindChain(l.chain.name)
1236  if not new_chain.IsValid():
1237  new_chain = ed.InsertChain(l.chain.name)
1238  ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY)
1239  else:
1240  # Does a residue with the same name already exist?
1241  already_exists = new_chain.FindResidue(l.number).IsValid()
1242  if already_exists:
1243  if rename_ligand_chain:
1244  chain_ext = 2 # Extend the chain name by this
1245  while True:
1246  new_chain_name = \
1247  l.chain.name + "_" + str(chain_ext)
1248  new_chain = ent.FindChain(new_chain_name)
1249  if new_chain.IsValid():
1250  chain_ext += 1
1251  continue
1252  else:
1253  new_chain = \
1254  ed.InsertChain(new_chain_name)
1255  break
1256  LogInfo("Moved ligand residue %s to new chain %s" % (
1257  l.qualified_name, new_chain.name))
1258  else:
1259  msg = \
1260  "A residue number %s already exists in chain %s" % (
1261  l.number, l.chain.name)
1262  raise RuntimeError(msg)
1263 
1264  # Add the residue with its original residue number
1265  new_res = ed.AppendResidue(new_chain, l.name, l.number)
1266  # Add atoms
1267  for old_atom in l.atoms:
1268  ed.InsertAtom(new_res, old_atom.name, old_atom.pos,
1269  element=old_atom.element, occupancy=old_atom.occupancy,
1270  b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
1271  # Add bonds
1272  for old_atom in l.atoms:
1273  for old_bond in old_atom.bonds:
1274  new_first = new_res.FindAtom(old_bond.first.name)
1275  new_second = new_res.FindAtom(old_bond.second.name)
1276  ed.Connect(new_first, new_second, old_bond.bond_order)
1277  new_res.SetIsLigand(True)
1278  return new_res
1279 
1280  def _cleanup_polymer_ent(self, ent, clib):
1281  """ In principle molck light but logs LigandScorer specific warnings
1282 
1283  Only to be applied to polymer entity
1284 
1285  1) removes atoms with elements set to H or D (not logged as there is no
1286  effect on scoring)
1287  2) removes residues with no entry in component dictionary
1288  3) removes all residues that are not peptide_liking or
1289  nucleotide_linking according component dictionary
1290  4) removes unknown atoms according to component dictionary
1291  5) reruns processor
1292  """
1293 
1294  cleanup_log = {"cleaned_residues": {"no_clib": [],
1295  "not_linking": []},
1296  "cleaned_atoms": {"unknown_atoms": []}}
1297 
1298  # 1)
1299  hydrogen_sel = ent.Select("ele == H or ele == D")
1300  if hydrogen_sel.GetAtomCount() > 0:
1301  # just do, no logging as it has no effect on scoring
1302  ent = ost.mol.CreateEntityFromView(ent.Select(
1303  "ele != H and ele != D"), include_exlusive_atoms=False)
1304 
1305  # extract residues/atoms for 2), 3) and 4)
1306  not_in_clib = list()
1307  not_linking = list()
1308  unknown_atom = list()
1309 
1310  for r in ent.residues:
1311  comp = clib.FindCompound(r.GetName())
1312  if comp is None:
1313  not_in_clib.append(r)
1314  continue
1315  if not (comp.IsPeptideLinking() or comp.IsNucleotideLinking()):
1316  not_linking.append(r)
1317  continue
1318  comp_anames = set([a.name for a in comp.atom_specs])
1319  for a in r.atoms:
1320  if a.name not in comp_anames:
1321  unknown_atom.append(a)
1322 
1323  # 2)
1324  if len(not_in_clib) > 0:
1325  cleanup_log["cleaned_residues"]["no_clib"] = \
1326  [_QualifiedResidueNotation(r) for r in not_in_clib]
1327  msg = ("Expect all residues in receptor structure to be defined in "
1328  "compound library but this is not the case. "
1329  "Please refer to the OpenStructure website if an updated "
1330  "library is sufficient to solve the problem: "
1331  "https://openstructure.org/docs/conop/compoundlib/ "
1332  "These residues will not be considered for scoring (which "
1333  "may also affect pre-processing steps such as alignments): ")
1334  msg += ','.join(cleanup_log["cleaned_residues"]["no_clib"])
1335  ost.LogWarning(msg)
1336  for r in not_in_clib:
1337  r.SetIntProp("clib", 1)
1338  ent = ost.mol.CreateEntityFromView(ent.Select("grclib:0!=1"),
1339  include_exlusive_atoms=False)
1340 
1341  # 3)
1342  if len(not_linking) > 0:
1343  cleanup_log["cleaned_residues"]["not_linking"] = \
1344  [_QualifiedResidueNotation(r) for r in not_linking]
1345  msg = ("Expect all residues in receptor structure to be peptide "
1346  "linking or nucleotide linking according to the compound "
1347  "library but this is not the case. "
1348  "Please refer to the OpenStructure website if an updated "
1349  "library is sufficient to solve the problem: "
1350  "https://openstructure.org/docs/conop/compoundlib/ "
1351  "These residues will not be considered for scoring (which "
1352  "may also affect pre-processing steps such as alignments): ")
1353  msg += ','.join(cleanup_log["cleaned_residues"]["not_linking"])
1354  ost.LogWarning(msg)
1355  for r in not_linking:
1356  r.SetIntProp("linking", 1)
1357  ent = ost.mol.CreateEntityFromView(ent.Select("grlinking:0!=1"),
1358  include_exlusive_atoms=False)
1359 
1360  # 4)
1361  if len(unknown_atom) > 0:
1362  cleanup_log["cleaned_atoms"]["unknown_atoms"] = \
1363  [_QualifiedAtomNotation(a) for a in unknown_atom]
1364  msg = ("Expect atom names according to the compound library but "
1365  "this is not the case."
1366  "Please refer to the OpenStructure website if an updated "
1367  "library is sufficient to solve the problem: "
1368  "https://openstructure.org/docs/conop/compoundlib/ "
1369  "These atoms will not be considered for scoring: ")
1370  msg += ','.join(cleanup_log["cleaned_atoms"]["unknown_atoms"])
1371  ost.LogWarning(msg)
1372  for a in unknown_atom:
1373  a.SetIntProp("unknown", 1)
1374  ent = ost.mol.CreateEntityFromView(ent.Select("gaunknown:0!=1"),
1375  include_exlusive_atoms=False)
1376 
1377  # 5)
1378  processor = conop.RuleBasedProcessor(clib)
1379  processor.Process(ent)
1380 
1381  return (ent, cleanup_log)
1382 
1383 
1384 def _ResidueToGraph(residue, by_atom_index=False):
1385  """Return a NetworkX graph representation of the residue.
1386 
1387  :param residue: the residue from which to derive the graph
1388  :type residue: :class:`ost.mol.ResidueHandle` or
1389  :class:`ost.mol.ResidueView`
1390  :param by_atom_index: Set this parameter to True if you need the nodes to
1391  be labeled by atom index (within the residue).
1392  Otherwise, if False, the nodes will be labeled by
1393  atom names.
1394  :type by_atom_index: :class:`bool`
1395  :rtype: :class:`~networkx.classes.graph.Graph`
1396 
1397  Nodes are labeled with the Atom's uppercase
1398  :attr:`~ost.mol.AtomHandle.element`.
1399  """
1400  nxg = networkx.Graph()
1401 
1402  for atom in residue.atoms:
1403  nxg.add_node(atom.name, element=atom.element.upper())
1404 
1405  # This will list all edges twice - once for every atom of the pair.
1406  # But as of NetworkX 3.0 adding the same edge twice has no effect,
1407  # so we're good.
1408  nxg.add_edges_from([(
1409  b.first.name,
1410  b.second.name) for a in residue.atoms for b in a.GetBondList()])
1411 
1412  if by_atom_index:
1413  nxg = networkx.relabel_nodes(nxg,
1414  {a: b for a, b in zip(
1415  [a.name for a in residue.atoms],
1416  range(len(residue.atoms)))},
1417  True)
1418  return nxg
1419 
1420 def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False,
1421  by_atom_index=False, return_symmetries=True,
1422  max_symmetries=1e6, model_graph = None,
1423  target_graph = None):
1424  """Return a list of symmetries (isomorphisms) of the model onto the target
1425  residues.
1426 
1427  :param model_ligand: The model ligand
1428  :type model_ligand: :class:`ost.mol.ResidueHandle` or
1429  :class:`ost.mol.ResidueView`
1430  :param target_ligand: The target ligand
1431  :type target_ligand: :class:`ost.mol.ResidueHandle` or
1432  :class:`ost.mol.ResidueView`
1433  :param substructure_match: Set this to True to allow partial ligands
1434  in the reference.
1435  :type substructure_match: :class:`bool`
1436  :param by_atom_index: Set this parameter to True if you need the symmetries
1437  to refer to atom index (within the residue).
1438  Otherwise, if False, the symmetries refer to atom
1439  names.
1440  :type by_atom_index: :class:`bool`
1441  :type return_symmetries: If Truthy, return the mappings, otherwise simply
1442  return True if a mapping is found (and raise if
1443  no mapping is found). This is useful to quickly
1444  find out if a mapping exist without the expensive
1445  step to find all the mappings.
1446  :type return_symmetries: :class:`bool`
1447  :param max_symmetries: If more than that many isomorphisms exist, raise
1448  a :class:`TooManySymmetriesError`. This can only be assessed by
1449  generating at least that many isomorphisms and can take some time.
1450  :type max_symmetries: :class:`int`
1451  :raises: :class:`NoSymmetryError` when no symmetry can be found;
1452  :class:`NoIsomorphicSymmetryError` in case of isomorphic
1453  subgraph but *substructure_match* is False;
1454  :class:`TooManySymmetriesError` when more than `max_symmetries`
1455  isomorphisms are found; :class:`DisconnectedGraphError` if
1456  graph for *model_ligand*/*target_ligand* is disconnected.
1457  """
1458 
1459  # Get the Graphs of the ligands
1460  if model_graph is None:
1461  model_graph = _ResidueToGraph(model_ligand,
1462  by_atom_index=by_atom_index)
1463 
1464  if not networkx.is_connected(model_graph):
1465  msg = "Disconnected graph for model ligand %s" % model_ligand
1466  raise DisconnectedGraphError(msg)
1467 
1468 
1469  if target_graph is None:
1470  target_graph = _ResidueToGraph(target_ligand,
1471  by_atom_index=by_atom_index)
1472 
1473  if not networkx.is_connected(target_graph):
1474  msg = "Disconnected graph for target ligand %s" % target_ligand
1475  raise DisconnectedGraphError(msg)
1476 
1477  # Note the argument order (model, target) which differs from spyrmsd.
1478  # This is because a subgraph of model is isomorphic to target - but not the
1479  # opposite as we only consider partial ligands in the reference.
1480  # Make sure to generate the symmetries correctly in the end
1481  gm = networkx.algorithms.isomorphism.GraphMatcher(
1482  model_graph, target_graph, node_match=lambda x, y:
1483  x["element"] == y["element"])
1484  if gm.is_isomorphic():
1485  if not return_symmetries:
1486  return True
1487  symmetries = []
1488  for i, isomorphism in enumerate(gm.isomorphisms_iter()):
1489  if i >= max_symmetries:
1490  raise TooManySymmetriesError(
1491  "Too many symmetries between %s and %s" % (
1492  str(model_ligand), str(target_ligand)))
1493  symmetries.append((list(isomorphism.values()),
1494  list(isomorphism.keys())))
1495  assert len(symmetries) > 0
1496  LogVerbose("Found %s isomorphic mappings (symmetries)" % len(symmetries))
1497  elif gm.subgraph_is_isomorphic() and substructure_match:
1498  if not return_symmetries:
1499  return True
1500  symmetries = []
1501  for i, isomorphism in enumerate(gm.subgraph_isomorphisms_iter()):
1502  if i >= max_symmetries:
1503  raise TooManySymmetriesError(
1504  "Too many symmetries between %s and %s" % (
1505  str(model_ligand), str(target_ligand)))
1506  symmetries.append((list(isomorphism.values()),
1507  list(isomorphism.keys())))
1508  assert len(symmetries) > 0
1509  # Assert that all the atoms in the target are part of the substructure
1510  assert len(symmetries[0][0]) == len(target_ligand.atoms)
1511  n_sym = len(symmetries)
1512  LogVerbose("Found %s subgraph isomorphisms (symmetries)" % n_sym)
1513  elif gm.subgraph_is_isomorphic():
1514  LogVerbose("Found subgraph isomorphisms (symmetries), but"
1515  " ignoring because substructure_match=False")
1516  raise NoIsomorphicSymmetryError("No symmetry between %s and %s" % (
1517  str(model_ligand), str(target_ligand)))
1518  else:
1519  LogVerbose("Found no isomorphic mappings (symmetries)")
1520  raise NoSymmetryError("No symmetry between %s and %s" % (
1521  str(model_ligand), str(target_ligand)))
1522 
1523  return symmetries
1524 
1525 class NoSymmetryError(ValueError):
1526  """ Exception raised when no symmetry can be found.
1527  """
1528  pass
1529 
1530 class NoIsomorphicSymmetryError(ValueError):
1531  """ Exception raised when no isomorphic symmetry can be found
1532 
1533  There would be isomorphic subgraphs for which symmetries can
1534  be found, but substructure_match is disabled
1535  """
1536  pass
1537 
1538 class TooManySymmetriesError(ValueError):
1539  """ Exception raised when too many symmetries are found.
1540  """
1541  pass
1542 
1543 class DisconnectedGraphError(Exception):
1544  """ Exception raised when the ligand graph is disconnected.
1545  """
1546  pass
1547 
1548 # specify public interface
1549 __all__ = ('CleanHydrogens', 'MMCIFPrep', 'PDBPrep',
1550  'LigandScorer', 'ComputeSymmetries', 'NoSymmetryError',
1551  'NoIsomorphicSymmetryError', 'TooManySymmetriesError',
1552  'DisconnectedGraphError')
Protein or molecule.
definition of EntityView
Definition: entity_view.hh:86
def _copy_ligand(self, l, ent, ed, rename_ligand_chain)
def _compute(self, symmetries, target_ligand, model_ligand)
def __init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, rename_ligand_chain=False)
def _get_report(self, ligand_state, pair_states)
def PopVerbosityLevel()
Definition: __init__.py:232
def PushVerbosityLevel(value)
Definition: __init__.py:229
def MMCIFPrep(mmcif_path, biounit=None, extract_nonpoly=False, fault_tolerant=False)
def PDBPrep(pdb_path, fault_tolerant=False)
def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)
EntityHandle DLLEXPORT_OST_MOL CreateEntityFromView(const EntityView &view, bool include_exlusive_atoms, EntityHandle handle=EntityHandle())
create new entity handle from entity view