1 from contextlib
import contextmanager
9 from ost
import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
14 def _SinkVerbosityLevel(n=1):
15 """ Context manager to temporarily reduce the verbosity level by n.
18 with _SinkVerbosityLevel(2):
20 Will only write "Test" in script level (2 above)
31 def _QualifiedAtomNotation(a):
32 """Return a parsable string of the atom in the format:
33 ChainName.ResidueNumber.InsertionCode.AtomName."""
34 resnum = a.residue.number
35 return "{cname}.{rnum}.{ins_code}.{aname}".format(
38 ins_code=resnum.ins_code.strip(
"\u0000"),
43 def _QualifiedResidueNotation(r):
44 """Return a parsable string of the residue in the format:
45 ChainName.ResidueNumber.InsertionCode."""
47 return "{cname}.{rnum}.{ins_code}".format(
50 ins_code=resnum.ins_code.strip(
"\u0000"),
55 """ Ligand scoring helper - Returns copy of *ent* without hydrogens
57 Non-standard hydrogen naming can cause trouble in residue property
58 assignment which is done by the :class:`ost.conop.RuleBasedProcessor` when
59 loading. In fact, residue property assignment is not done for every residue
60 that has unknown atoms according to the chemical component dictionary. This
61 function therefore re-processes the entity after removing hydrogens.
63 :param ent: Entity to clean
64 :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
65 :param clib: Compound library to perform re-processing after hydrogen
67 :type clib: :class:`ost.conop.CompoundLib`
68 :returns: Cleaned and re-processed ent
70 cleaned_ent = mol.CreateEntityFromView(ent.Select(
71 "ele != H and ele != D"), include_exlusive_atoms=
False)
75 processor.Process(cleaned_ent)
79 def MMCIFPrep(mmcif_path, biounit=None, extract_nonpoly=False,
80 fault_tolerant=False):
81 """ Ligand scoring helper - Prepares :class:`LigandScorer` input from mmCIF
83 Only performs gentle cleanup of hydrogen atoms. Further cleanup is delegated
84 to :class:`LigandScorer`.
86 :param mmcif_path: Path to mmCIF file that contains polymer and optionally
88 :type mmcif_path: :class:`str`
89 :param biounit: If given, construct specified biounit from mmCIF AU
90 :type biounit: :class:`str`
91 :param extract_nonpoly: Additionally returns a list of
92 :class:`ost.mol.EntityHandle`
93 objects representing all non-polymer (ligand)
95 :type extract_nonpoly: :class:`bool`
96 :param fault_tolerant: Passed as parameter to :func:`ost.io.LoadMMCIF`
97 :type fault_tolerant: :class:`bool`
98 :returns: :class:`ost.mol.EntityHandle` which only contains polymer
99 entities representing the receptor structure. If *extract_nonpoly*
100 is True, a tuple is returned which additionally contains a
101 :class:`list` of :class:`ost.mol.EntityHandle`, where each
102 :class:`ost.mol.EntityHandle` represents a non-polymer (ligand).
104 clib = conop.GetDefaultLib()
106 ost.LogError(
"A compound library is required. "
107 "Please refer to the OpenStructure website: "
108 "https://openstructure.org/docs/conop/compoundlib/.")
109 raise RuntimeError(
"No compound library found")
111 mmcif_entity, mmcif_info = io.LoadMMCIF(mmcif_path, info=
True,
112 fault_tolerant=fault_tolerant)
116 polymer_entity_ids = mmcif_info.GetEntityIdsOfType(
"polymer")
117 polymer_chain_names = list()
118 for ch
in mmcif_entity.chains:
119 if mmcif_info.GetMMCifEntityIdTr(ch.name)
in polymer_entity_ids:
120 polymer_chain_names.append(ch.name)
123 non_polymer_entity_ids = mmcif_info.GetEntityIdsOfType(
"non-polymer")
124 non_polymer_chain_names = list()
125 for ch
in mmcif_entity.chains:
126 if mmcif_info.GetMMCifEntityIdTr(ch.name)
in non_polymer_entity_ids:
127 non_polymer_chain_names.append(ch.name)
130 if biounit
is not None:
131 biounit_found =
False
132 for bu
in mmcif_info.biounits:
134 mmcif_entity = mol.alg.CreateBU(mmcif_entity, bu)
137 if not biounit_found:
138 raise RuntimeError(f
"Specified biounit '{biounit}' not in "
143 for ch
in mmcif_entity.chains:
145 if biounit
is not None:
149 dot_index = ch.name.find(
'.')
153 cname = ch.name[dot_index+1:]
157 if cname
in polymer_chain_names:
158 ch.SetIntProp(
"poly", 1)
159 if cname
in non_polymer_chain_names:
160 ch.SetIntProp(
"nonpolyid", non_poly_id)
163 poly_sel = mmcif_entity.Select(
"gcpoly:0=1")
164 poly_ent = mol.CreateEntityFromView(poly_sel,
True)
166 if extract_nonpoly ==
False:
169 non_poly_sel = mmcif_entity.Select(
"gcnonpoly:0=1")
170 non_poly_entities = list()
171 for i
in range(non_poly_id):
172 view = mmcif_entity.Select(f
"gcnonpolyid:{non_poly_id}={i}")
173 if view.GetResidueCount() != 1:
174 raise RuntimeError(f
"Expect non-polymer entities in "
175 f
"{mmcif_path} to contain exactly 1 "
176 f
"residue. Got {ch.GetResidueCount()} "
177 f
"in chain {ch.name}")
178 compound = clib.FindCompound(view.residues[0].name)
180 raise RuntimeError(f
"Can only extract non-polymer entities if "
181 f
"respective residues are available in PDB "
182 f
"component dictionary. Can't find "
183 f
"\"{view.residues[0].name}\"")
185 non_poly_entities.append(mol.CreateEntityFromView(view,
True))
187 return (poly_ent, non_poly_entities)
191 """ Ligand scoring helper - Prepares :class:`LigandScorer` input from PDB
193 Only performs gentle cleanup of hydrogen atoms. Further cleanup is delegated
194 to :class:`LigandScorer`. There is no logic to extract ligands from PDB
195 files. Ligands must be provided separately as SDF files in these cases.
197 :param pdb_path: Path to PDB file that contains polymer entities
198 :type pdb_path: :class:`str`
199 :param fault_tolerant: Passed as parameter to :func:`ost.io.LoadPDB`
200 :type fault_tolerant: :class:`bool`
201 :returns: :class:`EntityHandle` from loaded file.
203 clib = conop.GetDefaultLib()
205 ost.LogError(
"A compound library is required. "
206 "Please refer to the OpenStructure website: "
207 "https://openstructure.org/docs/conop/compoundlib/.")
208 raise RuntimeError(
"No compound library found")
210 pdb_entity = io.LoadPDB(pdb_path, fault_tolerant=fault_tolerant)
217 """ Scorer to compute various small molecule ligand (non polymer) scores.
219 :class:`LigandScorer` is an abstract base class dealing with all the setup,
220 data storage, enumerating ligand symmetries and target/model ligand
221 matching/assignment. But actual score computation is delegated to child
224 At the moment, two such classes are available:
226 * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
227 that assesses the conservation of protein-ligand
229 * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
230 that computes a binding-site superposed, symmetry-corrected RMSD
231 (BiSyRMSD) and ligand pocket lDDT (lDDT-LP).
233 All versus all scores are available through the lazily computed
234 :attr:`score_matrix`. However, many things can go wrong... be it even
235 something as simple as two ligands not matching. Error states therefore
236 encode scoring issues. An Issue for a particular ligand is indicated by a
237 non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
238 This invalidates pairwise scores of such a ligand with all other ligands.
239 This and other issues in pairwise score computation are reported in
240 :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
241 Only if the respective location is 0, a valid pairwise score can be
242 expected. The states and their meaning can be explored with code::
244 for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
249 A common use case is to derive a one-to-one mapping between ligands in
250 the model and the target for which :class:`LigandScorer` provides an
251 automated :attr:`assignment` procedure.
252 By default, only exact matches between target and model ligands are
253 considered. This is a problem when the target only contains a subset
254 of the expected atoms (for instance if atoms are missing in an
255 experimental structure, which often happens in the PDB). With
256 `substructure_match=True`, complete model ligands can be scored against
257 partial target ligands. One problem with this approach is that it is
258 very easy to find good matches to small, irrelevant ligands like EDO, CO2
259 or GOL. The assignment algorithm therefore considers the coverage,
260 expressed as the fraction of atoms of the model ligand atoms covered in the
261 target. Higher coverage matches are prioritized, but a match with a better
262 score will be preferred if it falls within a window of `coverage_delta`
263 (by default 0.2) of a worse-scoring match. As a result, for instance,
264 with a delta of 0.2, a low-score match with coverage 0.96 would be
265 preferred over a high-score match with coverage 0.70.
269 Unlike most of OpenStructure, this class does not assume that the ligands
270 (either for the model or the target) are part of the PDB component
271 dictionary. They may have arbitrary residue names. Residue names do not
272 have to match between the model and the target. Matching is based on
273 the calculation of isomorphisms which depend on the atom element name and
274 atom connectivity (bond order is ignored).
275 It is up to the caller to ensure that the connectivity of atoms is properly
276 set before passing any ligands to this class. Ligands with improper
277 connectivity will lead to bogus results.
279 This only applies to the ligand. The rest of the model and target
280 structures (protein, nucleic acids) must still follow the usual rules and
281 contain only residues from the compound library. Structures are cleaned up
282 according to constructor documentation. We advise to
283 use the :func:`MMCIFPrep` and :func:`PDBPrep` for loading which already
284 clean hydrogens and, in the case of MMCIF, optionally extract ligands ready
285 to be used by the :class:`LigandScorer` based on "non-polymer" entity types.
286 In case of PDB file format, ligands must be loaded separately as SDF files.
288 Here is an example of how to setup a scorer::
290 from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
293 # Structure model in PDB format, containing the receptor only
294 model = PDBPrep("path_to_model.pdb")
295 # Ligand model as SDF file
296 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
297 # Target loaded from mmCIF, containing the ligand
298 target, target_ligands = MMCIFPrep("path_to_target.cif",
299 extract_nonpoly=True)
301 # Setup scorer object and compute SCRMSD
302 model_ligands = [model_ligand.Select("ele != H")]
303 sc = SCRMSDScorer(model, target, model_ligands, target_ligands)
305 # Perform assignment and read respective scores
306 for lig_pair in sc.assignment:
307 trg_lig = sc.target_ligands[lig_pair[0]]
308 mdl_lig = sc.model_ligands[lig_pair[1]]
309 score = sc.score_matrix[lig_pair[0], lig_pair[1]]
310 print(f"Score for {trg_lig} and {mdl_lig}: {score}")
312 # check cleanup in model and target structure:
313 print("model cleanup:", sc.model_cleanup_log)
314 print("target cleanup:", sc.target_cleanup_log)
316 :param model: Model structure - a deep copy is available as :attr:`model`.
317 The model undergoes the following cleanup steps which are
318 dependent on :class:`ost.conop.CompoundLib` returned by
319 :func:`ost.conop.GetDefaultLib`: 1) removal
320 of hydrogens, 2) removal of residues for which there is no
321 entry in :class:`ost.conop.CompoundLib`, 3) removal of
322 residues that are not peptide linking or nucleotide linking
323 according to :class:`ost.conop.CompoundLib` 4) removal of
324 atoms that are not defined for respective residues in
325 :class:`ost.conop.CompoundLib`. Except step 1), every cleanup
326 is logged with :class:`ost.LogLevel` Warning and a report is
327 available as :attr:`model_cleanup_log`.
328 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
329 :param target: Target structure - same processing as *model*.
330 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
331 :param model_ligands: Model ligands, as a list of
332 :class:`ost.mol.ResidueHandle`/
333 :class:`ost.mol.ResidueView`/
334 :class:`ost.mol.EntityHandle`/
335 :class:`ost.mol.EntityView`. For
336 :class:`ost.mol.EntityHandle`/
337 :class:`ost.mol.EntityView`, each residue is
338 considered to be an individual ligand.
339 All ligands are copied into a separate
340 :class:`ost.mol.EntityHandle` available as
341 :attr:`model_ligand_ent` and the respective
342 list of ligands is available as :attr:`model_ligands`.
343 :type model_ligands: :class:`list`
344 :param target_ligands: Target ligands, same processing as model ligands.
345 :type target_ligands: :class:`list`
346 :param resnum_alignments: Whether alignments between chemically equivalent
347 chains in *model* and *target* can be computed
348 based on residue numbers. This can be assumed in
349 benchmarking setups such as CAMEO/CASP.
350 :type resnum_alignments: :class:`bool`
351 :param substructure_match: Set this to True to allow incomplete (i.e.
352 partially resolved) target ligands.
353 :type substructure_match: :class:`bool`
354 :param coverage_delta: the coverage delta for partial ligand assignment.
355 :type coverage_delta: :class:`float`
356 :param max_symmetries: If more than that many isomorphisms exist for
357 a target-ligand pair, it will be ignored and reported
359 :type max_symmetries: :class:`int`
362 def __init__(self, model, target, model_ligands, target_ligands,
363 resnum_alignments=False, substructure_match=False,
364 coverage_delta=0.2, max_symmetries=1e5,
365 rename_ligand_chain=False):
368 self.
_model_model = mol.CreateEntityFromView(model,
False)
370 self.
_model_model = model.Copy()
372 raise RuntimeError(
"model must be of type EntityView/EntityHandle")
375 self.
_target_target = mol.CreateEntityFromView(target,
False)
377 self.
_target_target = target.Copy()
379 raise RuntimeError(
"target must be of type EntityView/EntityHandle")
381 clib = conop.GetDefaultLib()
383 ost.LogError(
"A compound library is required. "
384 "Please refer to the OpenStructure website: "
385 "https://openstructure.org/docs/conop/compoundlib/.")
386 raise RuntimeError(
"No compound library found")
395 target_ligand_ent_ed = self.
_target_ligand_ent_target_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
396 model_ligand_ent_ed = self.
_model_ligand_ent_model_ligand_ent.EditXCS(mol.BUFFERED_EDIT)
399 for l
in target_ligands:
403 target_ligand_ent_ed,
404 rename_ligand_chain))
407 target_ligand_ent_ed,
408 rename_ligand_chain))
410 raise RuntimeError(
"ligands must be of type EntityView/"
411 "EntityHandle/ResidueView/ResidueHandle")
414 for l
in model_ligands:
419 rename_ligand_chain))
423 rename_ligand_chain))
425 raise RuntimeError(
"ligands must be of type EntityView/"
426 "EntityHandle/ResidueView/ResidueHandle")
430 LogWarning(
"No ligands in the model")
432 raise ValueError(
"No ligand in the model and in the target")
466 iso =
"subgraph isomorphism"
468 iso =
"full graph isomorphism"
472 1: (
"identity", f
"Ligands could not be matched (by {iso})"),
473 2: (
"symmetries",
"Too many symmetries between ligand atoms were "
474 "found - increasing max_symmetries might help"),
475 3: (
"no_iso",
"No full isomorphic match could be found - enabling "
476 "substructure_match might allow a match"),
477 4: (
"disconnected",
"Ligand graph is disconnected"),
478 5: (
"stoichiometry",
"Ligand was already assigned to another ligand "
479 "(different stoichiometry)"),
480 6: (
"single_ligand_issue",
"Cannot compute valid pairwise score as "
481 "either model or target ligand have non-zero state."),
482 9: (
"unknown",
"An unknown error occured in LigandScorer")}
487 """ Model receptor structure
489 Processed according to docs in :class:`LigandScorer` constructor
495 """ Target receptor structure
497 Processed according to docs in :class:`LigandScorer` constructor
503 """ Reports residues/atoms that were removed in model during cleanup
505 Residues and atoms are described as :class:`str` in format
506 <chain_name>.<resnum>.<ins_code> (residue) and
507 <chain_name>.<resnum>.<ins_code>.<aname> (atom).
509 :class:`dict` with keys:
511 * 'cleaned_residues': another :class:`dict` with keys:
513 * 'no_clib': residues that have been removed because no entry could be
514 found in :class:`ost.conop.CompoundLib`
515 * 'not_linking': residues that have been removed because they're not
516 peptide or nucleotide linking according to
517 :class:`ost.conop.CompoundLib`
519 * 'cleaned_atoms': another :class:`dict` with keys:
521 * 'unknown_atoms': atoms that have been removed as they're not part
522 of their respective residue according to
523 :class:`ost.conop.CompoundLib`
535 """ Residues representing model ligands
537 :class:`list` of :class:`ost.mol.ResidueHandle`
543 """ Residues representing target ligands
545 :class:`list` of :class:`ost.mol.ResidueHandle`
551 """ Given at :class:`LigandScorer` construction
557 """ Given at :class:`LigandScorer` construction
563 """ Given at :class:`LigandScorer` construction
569 """ Given at :class:`LigandScorer` construction
575 """ Encodes states of ligand pairs
577 Ligand pairs can be matched and a valid score can be expected if
578 respective location in this matrix is 0.
579 Target ligands are in rows, model ligands in columns. States are encoded
580 as integers <= 9. Larger numbers encode errors for child classes.
581 Use something like ``self.state_decoding[3]`` to get a decscription.
583 :rtype: :class:`~numpy.ndarray`
591 """ Encodes states of model ligands
593 Non-zero state in any of the model ligands invalidates the full
594 respective column in :attr:`~state_matrix`.
596 :rtype: :class:`~numpy.ndarray`
604 """ Encodes states of target ligands
606 Non-zero state in any of the target ligands invalidates the full
607 respective row in :attr:`~state_matrix`.
609 :rtype: :class:`~numpy.ndarray`
617 """ Get the matrix of scores.
619 Target ligands are in rows, model ligands in columns.
621 NaN values indicate that no value could be computed (i.e. different
622 ligands). In other words: values are only valid if the respective
623 location in :attr:`~state_matrix` is 0.
625 :rtype: :class:`~numpy.ndarray`
633 """ Get the matrix of model ligand atom coverage in the target.
635 Target ligands are in rows, model ligands in columns.
637 NaN values indicate that no value could be computed (i.e. different
638 ligands). In other words: values are only valid if the respective
639 location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
640 only full match isomorphisms are considered, and therefore only values
641 of 1.0 can be observed.
643 :rtype: :class:`~numpy.ndarray`
651 """ Get the matrix of scorer specific auxiliary data.
653 Target ligands are in rows, model ligands in columns.
655 Auxiliary data consists of arbitrary data dicts which allow a child
656 class to provide additional information for a scored ligand pair.
657 empty dictionaries indicate that the child class simply didn't return
658 anything or that no value could be computed (e.g. different ligands).
659 In other words: values are only valid if respective location in the
660 :attr:`~state_matrix` is 0.
662 :rtype: :class:`~numpy.ndarray`
670 """ Ligand assignment based on computed scores
672 Implements a greedy algorithm to assign target and model ligands
673 with each other. Starts from each valid ligand pair as indicated
674 by a state of 0 in :attr:`state_matrix`. Each iteration first selects
675 high coverage pairs. Given max_coverage defined as the highest
676 coverage observed in the available pairs, all pairs with coverage
677 in [max_coverage-*coverage_delta*, max_coverage] are selected.
678 The best scoring pair among those is added to the assignment
679 and the whole process is repeated until there are no ligands to
682 :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
690 for trg_idx
in range(self.
score_matrixscore_matrix.shape[0]):
691 for mdl_idx
in range(self.
score_matrixscore_matrix.shape[1]):
692 if self.
state_matrixstate_matrix[trg_idx, mdl_idx] == 0:
693 tmp.append((self.
score_matrixscore_matrix[trg_idx, mdl_idx],
699 tmp.sort(reverse=
True)
703 raise RuntimeError(
"LigandScorer._score_dir must return one in "
706 LogScript(
"Computing ligand assignment")
709 coverage_thresh = max([x[1]
for x
in tmp]) - self.
coverage_deltacoverage_delta
710 top_coverage = [x
for x
in tmp
if x[1] >= coverage_thresh]
713 a = top_coverage[0][2]
714 b = top_coverage[0][3]
718 tmp = [x
for x
in tmp
if (x[2] != a
and x[3] != b)]
724 """ Get a dictionary of score values, keyed by model ligand
726 Extract score with something like:
727 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
728 The returned scores are based on :attr:`~assignment`.
730 :rtype: :class:`dict`
734 for (trg_lig_idx, mdl_lig_idx)
in self.
assignmentassignment:
736 cname = mdl_lig.GetChain().GetName()
737 rnum = mdl_lig.GetNumber()
740 score = self.
score_matrixscore_matrix[trg_lig_idx, mdl_lig_idx]
746 """ Get a dictionary of score details, keyed by model ligand
748 Extract dict with something like:
749 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
750 The returned info dicts are based on :attr:`~assignment`. The content is
751 documented in the respective child class.
753 :rtype: :class:`dict`
757 for (trg_lig_idx, mdl_lig_idx)
in self.
assignmentassignment:
759 cname = mdl_lig.GetChain().GetName()
760 rnum = mdl_lig.GetNumber()
763 d = self.
aux_matrixaux_matrix[trg_lig_idx, mdl_lig_idx]
770 """ Get indices of target ligands which are not assigned
772 :rtype: :class:`list` of :class:`int`
775 assigned = set([x[0]
for x
in self.
assignmentassignment])
776 return [x
for x
in range(len(self.
target_ligandstarget_ligands))
if x
not in assigned]
780 """ Get indices of model ligands which are not assigned
782 :rtype: :class:`list` of :class:`int`
785 assigned = set([x[1]
for x
in self.
assignmentassignment])
786 return [x
for x
in range(len(self.
model_ligandsmodel_ligands))
if x
not in assigned]
789 """ Get summary of states observed with respect to all model ligands
791 Mainly for debug purposes
793 :param trg_lig_idx: Index of target ligand for which report should be
795 :type trg_lig_idx: :class:`int`
801 """ Get summary of states observed with respect to all target ligands
803 Mainly for debug purposes
805 :param mdl_lig_idx: Index of model ligand for which report should be
807 :type mdl_lig_idx: :class:`int`
812 def _get_report(self, ligand_state, pair_states):
816 for s
in np.unique(pair_states):
818 indices = np.flatnonzero(pair_states == s).tolist()
819 pair_report.append({
"state": s,
820 "short desc": desc[0],
825 ligand_report = {
"state": ligand_state,
826 "short desc": desc[0],
829 return (ligand_report, pair_report)
832 """ Makes an educated guess why target ligand is not assigned
834 This either returns actual error states or custom states that are
835 derived from them. Currently, the following reasons are reported:
837 * `no_ligand`: there was no ligand in the model.
838 * `disconnected`: the ligand graph was disconnected.
839 * `identity`: the ligand was not found in the model (by graph
840 isomorphism). Check your ligand connectivity.
841 * `no_iso`: no full isomorphic match could be found. Try enabling
842 `substructure_match=True` if the target ligand is incomplete.
843 * `symmetries`: too many symmetries were found (by graph isomorphisms).
844 Try to increase `max_symmetries`.
845 * `stoichiometry`: there was a possible assignment in the model, but
846 the model ligand was already assigned to a different target ligand.
847 This indicates different stoichiometries.
848 * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
849 the binding site and the ligand, and lDDT-PLI is undefined.
850 * `target_binding_site` (SCRMSD only): no polymer residues were in
851 proximity of the target ligand.
852 * `model_binding_site` (SCRMSD only): the binding site was not found
853 in the model. Either the binding site was not modeled or the model
854 ligand was positioned too far in combination with
855 `full_bs_search=False`.
857 :param trg_lig_idx: Index of target ligand
858 :type trg_lig_idx: :class:`int`
859 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
860 sentence describing the issue, (\"unknown\",\"unknown\") if
861 nothing obvious can be found.
862 :raises: :class:`RuntimeError` if specified target ligand is assigned
865 raise RuntimeError(
"Specified target ligand is not unassigned")
869 return (
"no_ligand",
"No ligand in the model")
877 tmp = np.unique(self.
state_matrixstate_matrix[trg_lig_idx,:])
883 return (
"stoichiometry",
884 "Ligand was already assigned to an other "
885 "model ligand (different stoichiometry)")
894 mdl_idx = np.where(self.
state_matrixstate_matrix[trg_lig_idx,:]==6)[0]
897 raise RuntimeError(
"This should never happen")
904 if 6
in tmp
and len(tmp) > 1:
908 if 1
in tmp
and len(tmp) > 1:
916 """ Makes an educated guess why model ligand is not assigned
918 This either returns actual error states or custom states that are
919 derived from them. Currently, the following reasons are reported:
921 * `no_ligand`: there was no ligand in the target.
922 * `disconnected`: the ligand graph is disconnected.
923 * `identity`: the ligand was not found in the target (by graph or
924 subgraph isomorphism). Check your ligand connectivity.
925 * `no_iso`: no full isomorphic match could be found. Try enabling
926 `substructure_match=True` if the target ligand is incomplete.
927 * `symmetries`: too many symmetries were found (by graph isomorphisms).
928 Try to increase `max_symmetries`.
929 * `stoichiometry`: there was a possible assignment in the target, but
930 the model target was already assigned to a different model ligand.
931 This indicates different stoichiometries.
932 * `no_contact` (lDDT-PLI only): There were no lDDT contacts between
933 the binding site and the ligand, and lDDT-PLI is undefined.
934 * `target_binding_site` (SCRMSD only): a potential assignment was found
935 in the target, but there were no polymer residues in proximity of the
936 ligand in the target.
937 * `model_binding_site` (SCRMSD only): a potential assignment was
938 found in the target, but no binding site was found in the model.
939 Either the binding site was not modeled or the model ligand was
940 positioned too far in combination with `full_bs_search=False`.
942 :param mdl_lig_idx: Index of model ligand
943 :type mdl_lig_idx: :class:`int`
944 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
945 sentence describing the issue, (\"unknown\",\"unknown\") if
946 nothing obvious can be found.
947 :raises: :class:`RuntimeError` if specified model ligand is assigned
950 raise RuntimeError(
"Specified model ligand is not unassigned")
954 return (
"no_ligand",
"No ligand in the target")
962 tmp = np.unique(self.
state_matrixstate_matrix[:,mdl_lig_idx])
968 return (
"stoichiometry",
969 "Ligand was already assigned to an other "
970 "model ligand (different stoichiometry)")
979 trg_idx = np.where(self.
state_matrixstate_matrix[:,mdl_lig_idx]==6)[0]
982 raise RuntimeError(
"This should never happen")
989 if 6
in tmp
and len(tmp) > 1:
993 if 1
in tmp
and len(tmp) > 1:
1001 return_dict = dict()
1004 cname = lig.GetChain().GetName()
1005 rnum = lig.GetNumber()
1006 if cname
not in return_dict:
1007 return_dict[cname] = dict()
1008 return_dict[cname][rnum] = \
1014 return_dict = dict()
1017 cname = lig.GetChain().GetName()
1018 rnum = lig.GetNumber()
1019 if cname
not in return_dict:
1020 return_dict[cname] = dict()
1021 return_dict[cname][rnum] = \
1026 def _chain_mapper(self):
1027 """ Chain mapper object for the given :attr:`target`.
1029 Can be used by child classes if needed, constructed with
1030 *resnum_alignments* flag
1032 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
1035 with _SinkVerbosityLevel():
1042 def _compute_scores(self):
1044 Compute score for every possible target-model ligand pair and store the
1045 result in internal matrices.
1051 self.
_score_matrix_score_matrix = np.full(shape, np.nan, dtype=np.float32)
1052 self.
_coverage_matrix_coverage_matrix = np.full(shape, np.nan, dtype=np.float32)
1053 self.
_state_matrix_state_matrix = np.full(shape, 0, dtype=np.int32)
1056 self.
_aux_matrix_aux_matrix = np.empty(shape, dtype=dict)
1060 [_ResidueToGraph(l, by_atom_index=
True)
for l
in self.
target_ligandstarget_ligands]
1062 [_ResidueToGraph(l, by_atom_index=
True)
for l
in self.
model_ligandsmodel_ligands]
1064 for g_idx, g
in enumerate(target_graphs):
1065 if not networkx.is_connected(g):
1068 msg =
"Disconnected graph observed for target ligand "
1072 for g_idx, g
in enumerate(model_graphs):
1073 if not networkx.is_connected(g):
1076 msg =
"Disconnected graph observed for model ligand "
1081 LogScript(
"Computing pairwise scores for all %s x %s ligands" % shape)
1082 for target_id, target_ligand
in enumerate(self.
target_ligandstarget_ligands):
1083 LogInfo(
"Analyzing target ligand %s" % target_ligand)
1090 for model_id, model_ligand
in enumerate(self.
model_ligandsmodel_ligands):
1091 LogInfo(
"Comparing to model ligand %s" % model_ligand)
1104 model_ligand, target_ligand,
1108 model_graph = model_graphs[model_id],
1109 target_graph = target_graphs[target_id])
1110 LogInfo(
"Ligands %s and %s symmetry match" % (
1111 str(model_ligand), str(target_ligand)))
1112 except NoSymmetryError:
1114 LogInfo(
"No symmetry between %s and %s" % (
1115 str(model_ligand), str(target_ligand)))
1118 except TooManySymmetriesError:
1120 LogWarning(
"Too many symmetries between %s and %s" % (
1121 str(model_ligand), str(target_ligand)))
1124 except NoIsomorphicSymmetryError:
1126 LogInfo(
"No isomorphic symmetry between %s and %s" % (
1127 str(model_ligand), str(target_ligand)))
1130 except DisconnectedGraphError:
1133 LogError(
"Disconnected graph observed for %s and %s" % (
1134 str(model_ligand), str(target_ligand)))
1144 score, pair_state, target_ligand_state, model_ligand_state, aux\
1145 = self.
_compute_compute(symmetries, target_ligand, model_ligand)
1156 target_ligand_state
not in self.
state_decodingstate_decoding
or \
1158 raise RuntimeError(f
"Subclass returned state "
1159 f
"\"{state}\" for which no "
1160 f
"description is available. Point "
1161 f
"the developer of the used scorer "
1162 f
"to this error message.")
1166 if target_ligand_state != 0
and pair_state != 6:
1167 raise RuntimeError(
"Observed non-zero target-ligand "
1168 "state which must trigger certain "
1169 "pair state. Point the developer of "
1170 "the used scorer to this error message")
1172 if model_ligand_state != 0
and pair_state != 6:
1173 raise RuntimeError(
"Observed non-zero model-ligand "
1174 "state which must trigger certain "
1175 "pair state. Point the developer of "
1176 "the used scorer to this error message")
1178 self.
_state_matrix_state_matrix[target_id, model_id] = pair_state
1182 if score
is None or np.isnan(score):
1183 raise RuntimeError(
"LigandScorer returned invalid "
1184 "score despite 0 error state")
1186 self.
_score_matrix_score_matrix[target_id, model_id] = score
1187 cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1189 self.
_aux_matrix_aux_matrix[target_id, model_id] = aux
1191 def _compute(self, symmetries, target_ligand, model_ligand):
1192 """ Compute score for specified ligand pair - defined by child class
1194 Raises :class:`NotImplementedError` if not implemented by child class.
1196 :param symmetries: Defines symmetries between *target_ligand* and
1197 *model_ligand*. Return value of
1198 :func:`ComputeSymmetries`
1199 :type symmetries: :class:`list` of :class:`tuple` with two elements
1200 each: 1) :class:`list` of atom indices in
1201 *target_ligand* 2) :class:`list` of respective atom
1202 indices in *model_ligand*
1203 :param target_ligand: The target ligand
1204 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1205 :class:`ost.mol.ResidueView`
1206 :param model_ligand: The model ligand
1207 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1208 :class:`ost.mol.ResidueView`
1210 :returns: A :class:`tuple` with three elements: 1) a score
1211 (:class:`float`) 2) state (:class:`int`).
1212 3) auxiliary data for this ligand pair (:class:`dict`).
1213 If state is 0, the score and auxiliary data will be
1214 added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1215 as the respective value in :attr:`~coverage_matrix`.
1216 Returned score must be valid in this case (not None/NaN).
1217 Child specific non-zero states must be >= 10.
1219 raise NotImplementedError(
"_compute must be implemented by child "
1222 def _score_dir(self):
1223 """ Return direction of score - defined by child class
1225 Relevant for ligand assignment. Must return a string in ['+', '-'].
1226 '+' for ascending scores, i.e. higher is better (lddt etc.)
1227 '-' for descending scores, i.e. lower is better (rmsd etc.)
1229 raise NotImplementedError(
"_score_dir must be implemented by child "
1232 def _copy_ligand(self, l, ent, ed, rename_ligand_chain):
1233 """ Copies ligand into entity and returns residue handle
1235 new_chain = ent.FindChain(l.chain.name)
1236 if not new_chain.IsValid():
1237 new_chain = ed.InsertChain(l.chain.name)
1238 ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY)
1241 already_exists = new_chain.FindResidue(l.number).IsValid()
1243 if rename_ligand_chain:
1247 l.chain.name +
"_" + str(chain_ext)
1248 new_chain = ent.FindChain(new_chain_name)
1249 if new_chain.IsValid():
1254 ed.InsertChain(new_chain_name)
1256 LogInfo(
"Moved ligand residue %s to new chain %s" % (
1257 l.qualified_name, new_chain.name))
1260 "A residue number %s already exists in chain %s" % (
1261 l.number, l.chain.name)
1262 raise RuntimeError(msg)
1265 new_res = ed.AppendResidue(new_chain, l.name, l.number)
1267 for old_atom
in l.atoms:
1268 ed.InsertAtom(new_res, old_atom.name, old_atom.pos,
1269 element=old_atom.element, occupancy=old_atom.occupancy,
1270 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
1272 for old_atom
in l.atoms:
1273 for old_bond
in old_atom.bonds:
1274 new_first = new_res.FindAtom(old_bond.first.name)
1275 new_second = new_res.FindAtom(old_bond.second.name)
1276 ed.Connect(new_first, new_second, old_bond.bond_order)
1277 new_res.SetIsLigand(
True)
1280 def _cleanup_polymer_ent(self, ent, clib):
1281 """ In principle molck light but logs LigandScorer specific warnings
1283 Only to be applied to polymer entity
1285 1) removes atoms with elements set to H or D (not logged as there is no
1287 2) removes residues with no entry in component dictionary
1288 3) removes all residues that are not peptide_liking or
1289 nucleotide_linking according component dictionary
1290 4) removes unknown atoms according to component dictionary
1294 cleanup_log = {
"cleaned_residues": {
"no_clib": [],
1296 "cleaned_atoms": {
"unknown_atoms": []}}
1299 hydrogen_sel = ent.Select(
"ele == H or ele == D")
1300 if hydrogen_sel.GetAtomCount() > 0:
1303 "ele != H and ele != D"), include_exlusive_atoms=
False)
1306 not_in_clib = list()
1307 not_linking = list()
1308 unknown_atom = list()
1310 for r
in ent.residues:
1311 comp = clib.FindCompound(r.GetName())
1313 not_in_clib.append(r)
1315 if not (comp.IsPeptideLinking()
or comp.IsNucleotideLinking()):
1316 not_linking.append(r)
1318 comp_anames = set([a.name
for a
in comp.atom_specs])
1320 if a.name
not in comp_anames:
1321 unknown_atom.append(a)
1324 if len(not_in_clib) > 0:
1325 cleanup_log[
"cleaned_residues"][
"no_clib"] = \
1326 [_QualifiedResidueNotation(r)
for r
in not_in_clib]
1327 msg = (
"Expect all residues in receptor structure to be defined in "
1328 "compound library but this is not the case. "
1329 "Please refer to the OpenStructure website if an updated "
1330 "library is sufficient to solve the problem: "
1331 "https://openstructure.org/docs/conop/compoundlib/ "
1332 "These residues will not be considered for scoring (which "
1333 "may also affect pre-processing steps such as alignments): ")
1334 msg +=
','.join(cleanup_log[
"cleaned_residues"][
"no_clib"])
1336 for r
in not_in_clib:
1337 r.SetIntProp(
"clib", 1)
1339 include_exlusive_atoms=
False)
1342 if len(not_linking) > 0:
1343 cleanup_log[
"cleaned_residues"][
"not_linking"] = \
1344 [_QualifiedResidueNotation(r)
for r
in not_linking]
1345 msg = (
"Expect all residues in receptor structure to be peptide "
1346 "linking or nucleotide linking according to the compound "
1347 "library but this is not the case. "
1348 "Please refer to the OpenStructure website if an updated "
1349 "library is sufficient to solve the problem: "
1350 "https://openstructure.org/docs/conop/compoundlib/ "
1351 "These residues will not be considered for scoring (which "
1352 "may also affect pre-processing steps such as alignments): ")
1353 msg +=
','.join(cleanup_log[
"cleaned_residues"][
"not_linking"])
1355 for r
in not_linking:
1356 r.SetIntProp(
"linking", 1)
1358 include_exlusive_atoms=
False)
1361 if len(unknown_atom) > 0:
1362 cleanup_log[
"cleaned_atoms"][
"unknown_atoms"] = \
1363 [_QualifiedAtomNotation(a)
for a
in unknown_atom]
1364 msg = (
"Expect atom names according to the compound library but "
1365 "this is not the case."
1366 "Please refer to the OpenStructure website if an updated "
1367 "library is sufficient to solve the problem: "
1368 "https://openstructure.org/docs/conop/compoundlib/ "
1369 "These atoms will not be considered for scoring: ")
1370 msg +=
','.join(cleanup_log[
"cleaned_atoms"][
"unknown_atoms"])
1372 for a
in unknown_atom:
1373 a.SetIntProp(
"unknown", 1)
1375 include_exlusive_atoms=
False)
1379 processor.Process(ent)
1381 return (ent, cleanup_log)
1384 def _ResidueToGraph(residue, by_atom_index=False):
1385 """Return a NetworkX graph representation of the residue.
1387 :param residue: the residue from which to derive the graph
1388 :type residue: :class:`ost.mol.ResidueHandle` or
1389 :class:`ost.mol.ResidueView`
1390 :param by_atom_index: Set this parameter to True if you need the nodes to
1391 be labeled by atom index (within the residue).
1392 Otherwise, if False, the nodes will be labeled by
1394 :type by_atom_index: :class:`bool`
1395 :rtype: :class:`~networkx.classes.graph.Graph`
1397 Nodes are labeled with the Atom's uppercase
1398 :attr:`~ost.mol.AtomHandle.element`.
1400 nxg = networkx.Graph()
1402 for atom
in residue.atoms:
1403 nxg.add_node(atom.name, element=atom.element.upper())
1408 nxg.add_edges_from([(
1410 b.second.name)
for a
in residue.atoms
for b
in a.GetBondList()])
1413 nxg = networkx.relabel_nodes(nxg,
1414 {a: b
for a, b
in zip(
1415 [a.name
for a
in residue.atoms],
1416 range(len(residue.atoms)))},
1421 by_atom_index=False, return_symmetries=True,
1422 max_symmetries=1e6, model_graph = None,
1423 target_graph = None):
1424 """Return a list of symmetries (isomorphisms) of the model onto the target
1427 :param model_ligand: The model ligand
1428 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1429 :class:`ost.mol.ResidueView`
1430 :param target_ligand: The target ligand
1431 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1432 :class:`ost.mol.ResidueView`
1433 :param substructure_match: Set this to True to allow partial ligands
1435 :type substructure_match: :class:`bool`
1436 :param by_atom_index: Set this parameter to True if you need the symmetries
1437 to refer to atom index (within the residue).
1438 Otherwise, if False, the symmetries refer to atom
1440 :type by_atom_index: :class:`bool`
1441 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1442 return True if a mapping is found (and raise if
1443 no mapping is found). This is useful to quickly
1444 find out if a mapping exist without the expensive
1445 step to find all the mappings.
1446 :type return_symmetries: :class:`bool`
1447 :param max_symmetries: If more than that many isomorphisms exist, raise
1448 a :class:`TooManySymmetriesError`. This can only be assessed by
1449 generating at least that many isomorphisms and can take some time.
1450 :type max_symmetries: :class:`int`
1451 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1452 :class:`NoIsomorphicSymmetryError` in case of isomorphic
1453 subgraph but *substructure_match* is False;
1454 :class:`TooManySymmetriesError` when more than `max_symmetries`
1455 isomorphisms are found; :class:`DisconnectedGraphError` if
1456 graph for *model_ligand*/*target_ligand* is disconnected.
1460 if model_graph
is None:
1461 model_graph = _ResidueToGraph(model_ligand,
1462 by_atom_index=by_atom_index)
1464 if not networkx.is_connected(model_graph):
1465 msg =
"Disconnected graph for model ligand %s" % model_ligand
1469 if target_graph
is None:
1470 target_graph = _ResidueToGraph(target_ligand,
1471 by_atom_index=by_atom_index)
1473 if not networkx.is_connected(target_graph):
1474 msg =
"Disconnected graph for target ligand %s" % target_ligand
1481 gm = networkx.algorithms.isomorphism.GraphMatcher(
1482 model_graph, target_graph, node_match=
lambda x, y:
1483 x[
"element"] == y[
"element"])
1484 if gm.is_isomorphic():
1485 if not return_symmetries:
1488 for i, isomorphism
in enumerate(gm.isomorphisms_iter()):
1489 if i >= max_symmetries:
1491 "Too many symmetries between %s and %s" % (
1492 str(model_ligand), str(target_ligand)))
1493 symmetries.append((list(isomorphism.values()),
1494 list(isomorphism.keys())))
1495 assert len(symmetries) > 0
1496 LogVerbose(
"Found %s isomorphic mappings (symmetries)" % len(symmetries))
1497 elif gm.subgraph_is_isomorphic()
and substructure_match:
1498 if not return_symmetries:
1501 for i, isomorphism
in enumerate(gm.subgraph_isomorphisms_iter()):
1502 if i >= max_symmetries:
1504 "Too many symmetries between %s and %s" % (
1505 str(model_ligand), str(target_ligand)))
1506 symmetries.append((list(isomorphism.values()),
1507 list(isomorphism.keys())))
1508 assert len(symmetries) > 0
1510 assert len(symmetries[0][0]) == len(target_ligand.atoms)
1511 n_sym = len(symmetries)
1512 LogVerbose(
"Found %s subgraph isomorphisms (symmetries)" % n_sym)
1513 elif gm.subgraph_is_isomorphic():
1514 LogVerbose(
"Found subgraph isomorphisms (symmetries), but"
1515 " ignoring because substructure_match=False")
1517 str(model_ligand), str(target_ligand)))
1519 LogVerbose(
"Found no isomorphic mappings (symmetries)")
1521 str(model_ligand), str(target_ligand)))
1526 """ Exception raised when no symmetry can be found.
1530 class NoIsomorphicSymmetryError(ValueError):
1531 """ Exception raised when no isomorphic symmetry can be found
1533 There would be isomorphic subgraphs for which symmetries can
1534 be found, but substructure_match is disabled
1539 """ Exception raised when too many symmetries are found.
1544 """ Exception raised when the ligand graph is disconnected.
1549 __all__ = (
'CleanHydrogens',
'MMCIFPrep',
'PDBPrep',
1550 'LigandScorer',
'ComputeSymmetries',
'NoSymmetryError',
1551 'NoIsomorphicSymmetryError',
'TooManySymmetriesError',
1552 'DisconnectedGraphError')
def substructure_match(self)
def _copy_ligand(self, l, ent, ed, rename_ligand_chain)
def _compute_scores(self)
def coverage_matrix(self)
def guess_target_ligand_unassigned_reason(self, trg_lig_idx)
def _compute(self, symmetries, target_ligand, model_ligand)
def resnum_alignments(self)
def unassigned_target_ligands(self)
def unassigned_model_ligands_reasons(self)
def model_ligand_states(self)
def unassigned_model_ligands(self)
def target_ligand_states(self)
def __init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, rename_ligand_chain=False)
def _cleanup_polymer_ent(self, ent, clib)
def model_cleanup_log(self)
def get_target_ligand_state_report(self, trg_lig_idx)
def get_model_ligand_state_report(self, mdl_lig_idx)
def _get_report(self, ligand_state, pair_states)
def guess_model_ligand_unassigned_reason(self, mdl_lig_idx)
def target_cleanup_log(self)
def unassigned_target_ligands_reasons(self)
def PushVerbosityLevel(value)
def MMCIFPrep(mmcif_path, biounit=None, extract_nonpoly=False, fault_tolerant=False)
def PDBPrep(pdb_path, fault_tolerant=False)
def CleanHydrogens(ent, clib)
def ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)
EntityHandle DLLEXPORT_OST_MOL CreateEntityFromView(const EntityView &view, bool include_exlusive_atoms, EntityHandle handle=EntityHandle())
create new entity handle from entity view