1from contextlib
import contextmanager
8from ost
import LogWarning, LogScript, LogInfo, LogVerbose, LogDebug, GetVerbosityLevel, PushVerbosityLevel, PopVerbosityLevel
13 """ Context manager to temporarily reduce the verbosity level by n.
16 with _SinkVerbosityLevel(2):
18 Will only write "Test" in script level (2 above)
20 PushVerbosityLevel(GetVerbosityLevel() - n)
30 """Return a parsable string of the atom in the format:
31 ChainName.ResidueNumber.InsertionCode.AtomName."""
32 resnum = a.residue.number
33 return "{cname}.{rnum}.{ins_code}.{aname}".format(
36 ins_code=resnum.ins_code.strip(
"\u0000"),
42 """Return a parsable string of the residue in the format:
43 ChainName.ResidueNumber.InsertionCode."""
45 return "{cname}.{rnum}.{ins_code}".format(
48 ins_code=resnum.ins_code.strip(
"\u0000"),
52 """ Scorer to compute various small molecule ligand (non polymer) scores.
54 :class:`LigandScorer` is an abstract base class dealing with all the setup,
55 data storage, enumerating ligand symmetries and target/model ligand
56 matching/assignment. But actual score computation is delegated to child
59 At the moment, two such classes are available:
61 * :class:`ost.mol.alg.ligand_scoring_lddtpli.LDDTPLIScorer`
62 that assesses the conservation of protein-ligand
64 * :class:`ost.mol.alg.ligand_scoring_scrmsd.SCRMSDScorer`
65 that computes a binding-site superposed, symmetry-corrected RMSD
66 (BiSyRMSD) and ligand pocket LDDT (LDDT-LP).
68 All versus all scores are available through the lazily computed
69 :attr:`score_matrix`. However, many things can go wrong... be it even
70 something as simple as two ligands not matching. Error states therefore
71 encode scoring issues. An Issue for a particular ligand is indicated by a
72 non-zero state in :attr:`model_ligand_states`/:attr:`target_ligand_states`.
73 This invalidates pairwise scores of such a ligand with all other ligands.
74 This and other issues in pairwise score computation are reported in
75 :attr:`state_matrix` which has the same size as :attr:`score_matrix`.
76 Only if the respective location is 0, a valid pairwise score can be
77 expected. The states and their meaning can be explored with code::
79 for state_code, (short_desc, desc) in scorer_obj.state_decoding.items():
84 A common use case is to derive a one-to-one mapping between ligands in
85 the model and the target for which :class:`LigandScorer` provides an
86 automated :attr:`assignment` procedure.
87 By default, only exact matches between target and model ligands are
88 considered. This is a problem when the target only contains a subset
89 of the expected atoms (for instance if atoms are missing in an
90 experimental structure, which often happens in the PDB). With
91 `substructure_match=True`, complete model ligands can be scored against
92 partial target ligands. One problem with this approach is that it is
93 very easy to find good matches to small, irrelevant ligands like EDO, CO2
94 or GOL. The assignment algorithm therefore considers the coverage,
95 expressed as the fraction of atoms of the model ligand atoms covered in the
96 target. Higher coverage matches are prioritized, but a match with a better
97 score will be preferred if it falls within a window of `coverage_delta`
98 (by default 0.2) of a worse-scoring match. As a result, for instance,
99 with a delta of 0.2, a low-score match with coverage 0.96 would be
100 preferred over a high-score match with coverage 0.70.
104 Unlike most of OpenStructure, this class does not assume that the ligands
105 (either for the model or the target) are part of the PDB component
106 dictionary. They may have arbitrary residue names. Residue names do not
107 have to match between the model and the target. Matching is based on
108 the calculation of isomorphisms which depend on the atom element name and
109 atom connectivity (bond order is ignored).
110 It is up to the caller to ensure that the connectivity of atoms is properly
111 set before passing any ligands to this class. Ligands with improper
112 connectivity will lead to bogus results.
114 This only applies to the ligand. The rest of the model and target
115 structures (protein, nucleic acids) must still follow the usual rules and
116 contain only residues from the compound library. Structures are cleaned up
117 according to constructor documentation. We advise to
118 use the :func:`ost.mol.alg.scoring_base.MMCIFPrep` and
119 :func:`ost.mol.alg.scoring_base.PDBPrep` for loading which already
120 clean hydrogens and, in the case of MMCIF, optionally extract ligands ready
121 to be used by the :class:`LigandScorer` based on "non-polymer" entity types.
122 In case of PDB file format, ligands must be loaded separately as SDF files.
124 Only polymers (protein and nucleic acids) of model and target are considered
125 for ligand binding sites. The
126 :class:`ost.mol.alg.chain_mapping.ChainMapper` is used to enumerate possible
127 mappings of these chains. In short: identical chains in the target are
128 grouped based on pairwise sequence identity
129 (see pep_seqid_thr/nuc_seqid_thr param). Each model chain is assigned to
130 one of these groups (see mdl_map_pep_seqid_thr/mdl_map_nuc_seqid_thr param).
131 To avoid spurious matches, only polymers of a certain length are considered
132 in this matching procedure (see min_pep_length/min_nuc_length param).
133 Shorter polymers are never mapped and do not contribute to scoring.
135 Here is an example of how to setup a scorer::
137 from ost.mol.alg.ligand_scoring_scrmsd import SCRMSDScorer
138 from ost.mol.alg.scoring_base import MMCIFPrep
139 from ost.mol.alg.scoring_base import PDBPrep
142 # Structure model in PDB format, containing the receptor only
143 model = PDBPrep("path_to_model.pdb")
144 # Ligand model as SDF file
145 model_ligand = io.LoadEntity("path_to_ligand.sdf", format="sdf")
146 # Target loaded from mmCIF, containing the ligand
147 target, target_ligands = MMCIFPrep("path_to_target.cif",
148 extract_nonpoly=True)
150 # Setup scorer object and compute SCRMSD
151 model_ligands = [model_ligand.Select("ele != H")]
152 sc = SCRMSDScorer(model, target, model_ligands, target_ligands)
154 # Perform assignment and read respective scores
155 for lig_pair in sc.assignment:
156 trg_lig = sc.target_ligands[lig_pair[0]]
157 mdl_lig = sc.model_ligands[lig_pair[1]]
158 score = sc.score_matrix[lig_pair[0], lig_pair[1]]
159 print(f"Score for {trg_lig} and {mdl_lig}: {score}")
161 # check cleanup in model and target structure:
162 print("model cleanup:", sc.model_cleanup_log)
163 print("target cleanup:", sc.target_cleanup_log)
166 :param model: Model structure - a deep copy is available as :attr:`model`.
167 The model undergoes the following cleanup steps which are
168 dependent on :class:`ost.conop.CompoundLib` returned by
169 :func:`ost.conop.GetDefaultLib`: 1) removal
170 of hydrogens, 2) removal of residues for which there is no
171 entry in :class:`ost.conop.CompoundLib`, 3) removal of
172 residues that are not peptide linking or nucleotide linking
173 according to :class:`ost.conop.CompoundLib` 4) removal of
174 atoms that are not defined for respective residues in
175 :class:`ost.conop.CompoundLib`. Except step 1), every cleanup
176 is logged with :class:`ost.LogLevel` Warning and a report is
177 available as :attr:`model_cleanup_log`.
178 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
179 :param target: Target structure - same processing as *model*.
180 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
181 :param model_ligands: Model ligands, as a list of
182 :class:`ost.mol.ResidueHandle`/
183 :class:`ost.mol.ResidueView`/
184 :class:`ost.mol.EntityHandle`/
185 :class:`ost.mol.EntityView`. For
186 :class:`ost.mol.EntityHandle`/
187 :class:`ost.mol.EntityView`, each residue is
188 considered to be an individual ligand.
189 All ligands are copied into a separate
190 :class:`ost.mol.EntityHandle` available as
191 :attr:`model_ligand_ent` and the respective
192 list of ligands is available as :attr:`model_ligands`.
193 :type model_ligands: :class:`list`
194 :param target_ligands: Target ligands, same processing as model ligands.
195 :type target_ligands: :class:`list`
196 :param resnum_alignments: Whether alignments between chemically equivalent
197 chains in *model* and *target* can be computed
198 based on residue numbers. This can be assumed in
199 benchmarking setups such as CAMEO/CASP.
200 :type resnum_alignments: :class:`bool`
201 :param substructure_match: Set this to True to allow incomplete (i.e.
202 partially resolved) target ligands.
203 :type substructure_match: :class:`bool`
204 :param coverage_delta: the coverage delta for partial ligand assignment.
205 :type coverage_delta: :class:`float`
206 :param max_symmetries: If more than that many isomorphisms exist for
207 a target-ligand pair, it will be ignored and reported
209 :type max_symmetries: :class:`int`
210 :param min_pep_length: Relevant parameter if short peptides are involved in
211 the polymer binding site. Minimum peptide length for
212 a chain to be considered in chain mapping.
213 The chain mapping algorithm first performs an all vs.
214 all pairwise sequence alignment to identify \"equal\"
215 chains within the target structure. We go for simple
216 sequence identity there. Short sequences can be
217 problematic as they may produce high sequence identity
218 alignments by pure chance.
219 :type min_pep_length: :class:`int`
220 :param min_nuc_length: Same for nucleotides
221 :type min_nuc_length: :class:`int`
222 :param pep_seqid_thr: Parameter that affects identification of identical
223 chains in target - see
224 :class:`ost.mol.alg.chain_mapping.ChainMapper`
225 :type pep_seqid_thr: :class:`float`
226 :param nuc_seqid_thr: Parameter that affects identification of identical
227 chains in target - see
228 :class:`ost.mol.alg.chain_mapping.ChainMapper`
229 :type nuc_seqid_thr: :class:`float`
230 :param mdl_map_pep_seqid_thr: Parameter that affects mapping of model chains
231 to target chains - see
232 :class:`ost.mol.alg.chain_mapping.ChainMapper`
233 :type mdl_map_pep_seqid_thr: :class:`float`
234 :param mdl_map_nuc_seqid_thr: Parameter that affects mapping of model chains
235 to target chains - see
236 :class:`ost.mol.alg.chain_mapping.ChainMapper`
237 :type mdl_map_nuc_seqid_thr: :class:`float`
238 :param seqres: Parameter that affects identification of identical chains in
239 target - see :class:`ost.mol.alg.chain_mapping.ChainMapper`
240 :type seqres: :class:`ost.seq.SequenceList`
241 :param trg_seqres_mapping: Parameter that affects identification of identical
242 chains in target - see
243 :class:`ost.mol.alg.chain_mapping.ChainMapper`
244 :type trg_seqres_mapping: :class:`dict`
247 def __init__(self, model, target, model_ligands, target_ligands,
248 resnum_alignments=False, substructure_match=False,
249 coverage_delta=0.2, max_symmetries=1e5,
250 rename_ligand_chain=False, min_pep_length = 6,
251 min_nuc_length = 4, pep_seqid_thr = 95.,
253 mdl_map_pep_seqid_thr = 0.,
254 mdl_map_nuc_seqid_thr = 0.,
256 trg_seqres_mapping = None):
259 self.
_model = mol.CreateEntityFromView(model,
False)
261 self.
_model = model.Copy()
263 raise RuntimeError(
"model must be of type EntityView/EntityHandle")
266 self.
_target = mol.CreateEntityFromView(target,
False)
270 raise RuntimeError(
"target must be of type EntityView/EntityHandle")
272 clib = conop.GetDefaultLib()
274 ost.LogError(
"A compound library is required. "
275 "Please refer to the OpenStructure website: "
276 "https://openstructure.org/docs/conop/compoundlib/.")
277 raise RuntimeError(
"No compound library found")
290 for l
in target_ligands:
294 target_ligand_ent_ed,
295 rename_ligand_chain))
298 target_ligand_ent_ed,
299 rename_ligand_chain))
301 raise RuntimeError(
"ligands must be of type EntityView/"
302 "EntityHandle/ResidueView/ResidueHandle")
305 for l
in model_ligands:
310 rename_ligand_chain))
314 rename_ligand_chain))
316 raise RuntimeError(
"ligands must be of type EntityView/"
317 "EntityHandle/ResidueView/ResidueHandle")
321 LogWarning(
"No ligands in the model")
323 raise ValueError(
"No ligand in the model and in the target")
370 iso =
"subgraph isomorphism"
372 iso =
"full graph isomorphism"
376 1: (
"identity", f
"Ligands could not be matched (by {iso})"),
377 2: (
"symmetries",
"Too many symmetries between ligand atoms were "
378 "found - increasing max_symmetries might help"),
379 3: (
"no_iso",
"No full isomorphic match could be found - enabling "
380 "substructure_match might allow a match"),
381 4: (
"disconnected",
"Ligand graph is disconnected"),
382 5: (
"stoichiometry",
"Ligand was already assigned to another ligand "
383 "(different stoichiometry)"),
384 6: (
"single_ligand_issue",
"Cannot compute valid pairwise score as "
385 "either model or target ligand have non-zero state."),
386 9: (
"unknown",
"An unknown error occured in LigandScorer")}
391 """ Model receptor structure
393 Processed according to docs in :class:`LigandScorer` constructor
399 """ Target receptor structure
401 Processed according to docs in :class:`LigandScorer` constructor
407 """ Reports residues/atoms that were removed in model during cleanup
409 Residues and atoms are described as :class:`str` in format
410 <chain_name>.<resnum>.<ins_code> (residue) and
411 <chain_name>.<resnum>.<ins_code>.<aname> (atom).
413 :class:`dict` with keys:
415 * 'cleaned_residues': another :class:`dict` with keys:
417 * 'no_clib': residues that have been removed because no entry could be
418 found in :class:`ost.conop.CompoundLib`
419 * 'not_linking': residues that have been removed because they're not
420 peptide or nucleotide linking according to
421 :class:`ost.conop.CompoundLib`
423 * 'cleaned_atoms': another :class:`dict` with keys:
425 * 'unknown_atoms': atoms that have been removed as they're not part
426 of their respective residue according to
427 :class:`ost.conop.CompoundLib`
439 """ Residues representing model ligands
441 :class:`list` of :class:`ost.mol.ResidueHandle`
447 """ Residues representing target ligands
449 :class:`list` of :class:`ost.mol.ResidueHandle`
455 """ Given at :class:`LigandScorer` construction
461 """ Given at :class:`LigandScorer` construction
467 """ Given at :class:`LigandScorer` construction
473 """ Given at :class:`LigandScorer` construction
479 """ Given at :class:`LigandScorer` construction
485 """ Given at :class:`LigandScorer` construction
491 """ Given at :class:`LigandScorer` construction
497 """ Given at :class:`LigandScorer` construction
503 """ Given at :class:`LigandScorer` construction
509 """ Given at :class:`LigandScorer` construction
515 """ Given at :class:`LigandScorer` construction
521 """ Given at :class:`LigandScorer` construction
527 """ Encodes states of ligand pairs
529 Ligand pairs can be matched and a valid score can be expected if
530 respective location in this matrix is 0.
531 Target ligands are in rows, model ligands in columns. States are encoded
532 as integers <= 9. Larger numbers encode errors for child classes.
533 Use something like ``self.state_decoding[3]`` to get a decscription.
535 :rtype: :class:`~numpy.ndarray`
543 """ Encodes states of model ligands
545 Non-zero state in any of the model ligands invalidates the full
546 respective column in :attr:`~state_matrix`.
548 :rtype: :class:`~numpy.ndarray`
556 """ Encodes states of target ligands
558 Non-zero state in any of the target ligands invalidates the full
559 respective row in :attr:`~state_matrix`.
561 :rtype: :class:`~numpy.ndarray`
569 """ Get the matrix of scores.
571 Target ligands are in rows, model ligands in columns.
573 NaN values indicate that no value could be computed (i.e. different
574 ligands). In other words: values are only valid if the respective
575 location in :attr:`~state_matrix` is 0.
577 :rtype: :class:`~numpy.ndarray`
585 """ Get the matrix of model ligand atom coverage in the target.
587 Target ligands are in rows, model ligands in columns.
589 NaN values indicate that no value could be computed (i.e. different
590 ligands). In other words: values are only valid if the respective
591 location in :attr:`~state_matrix` is 0. If `substructure_match=False`,
592 only full match isomorphisms are considered, and therefore only values
593 of 1.0 can be observed.
595 :rtype: :class:`~numpy.ndarray`
603 """ Get the matrix of scorer specific auxiliary data.
605 Target ligands are in rows, model ligands in columns.
607 Auxiliary data consists of arbitrary data dicts which allow a child
608 class to provide additional information for a scored ligand pair.
609 empty dictionaries indicate that the child class simply didn't return
610 anything or that no value could be computed (e.g. different ligands).
611 In other words: values are only valid if respective location in the
612 :attr:`~state_matrix` is 0.
614 :rtype: :class:`~numpy.ndarray`
622 """ Ligand assignment based on computed scores
624 Implements a greedy algorithm to assign target and model ligands
625 with each other. Starts from each valid ligand pair as indicated
626 by a state of 0 in :attr:`state_matrix`. Each iteration first selects
627 high coverage pairs. Given max_coverage defined as the highest
628 coverage observed in the available pairs, all pairs with coverage
629 in [max_coverage-*coverage_delta*, max_coverage] are selected.
630 The best scoring pair among those is added to the assignment
631 and the whole process is repeated until there are no ligands to
634 :rtype: :class:`list` of :class:`tuple` (trg_lig_idx, mdl_lig_idx)
651 tmp.sort(reverse=
True)
655 raise RuntimeError(
"LigandScorer._score_dir must return one in "
658 LogScript(
"Computing ligand assignment")
662 top_coverage = [x
for x
in tmp
if x[1] >= coverage_thresh]
665 a = top_coverage[0][2]
666 b = top_coverage[0][3]
670 tmp = [x
for x
in tmp
if (x[2] != a
and x[3] != b)]
676 """ Get a dictionary of score values, keyed by model ligand
678 Extract score with something like:
679 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
680 The returned scores are based on :attr:`~assignment`.
682 :rtype: :class:`dict`
686 for (trg_lig_idx, mdl_lig_idx)
in self.
assignment:
688 cname = mdl_lig.GetChain().GetName()
689 rnum = mdl_lig.GetNumber()
698 """ Get a dictionary of score details, keyed by model ligand
700 Extract dict with something like:
701 ``scorer.score[lig.GetChain().GetName()][lig.GetNumber()]``.
702 The returned info dicts are based on :attr:`~assignment`. The content is
703 documented in the respective child class.
705 :rtype: :class:`dict`
709 for (trg_lig_idx, mdl_lig_idx)
in self.
assignment:
711 cname = mdl_lig.GetChain().GetName()
712 rnum = mdl_lig.GetNumber()
722 """ Get indices of target ligands which are not assigned
724 :rtype: :class:`list` of :class:`int`
727 assigned = set([x[0]
for x
in self.
assignment])
732 """ Get indices of model ligands which are not assigned
734 :rtype: :class:`list` of :class:`int`
737 assigned = set([x[1]
for x
in self.
assignment])
741 """ Get summary of states observed with respect to all model ligands
743 Mainly for debug purposes
745 :param trg_lig_idx: Index of target ligand for which report should be
747 :type trg_lig_idx: :class:`int`
753 """ Get summary of states observed with respect to all target ligands
755 Mainly for debug purposes
757 :param mdl_lig_idx: Index of model ligand for which report should be
759 :type mdl_lig_idx: :class:`int`
768 for s
in np.unique(pair_states):
770 indices = np.flatnonzero(pair_states == s).tolist()
771 pair_report.append({
"state": s,
772 "short desc": desc[0],
777 ligand_report = {
"state": ligand_state,
778 "short desc": desc[0],
781 return (ligand_report, pair_report)
784 """ Makes an educated guess why target ligand is not assigned
786 This either returns actual error states or custom states that are
787 derived from them. Currently, the following reasons are reported:
789 * `no_ligand`: there was no ligand in the model.
790 * `disconnected`: the ligand graph was disconnected.
791 * `identity`: the ligand was not found in the model (by graph
792 isomorphism). Check your ligand connectivity.
793 * `no_iso`: no full isomorphic match could be found. Try enabling
794 `substructure_match=True` if the target ligand is incomplete.
795 * `symmetries`: too many symmetries were found (by graph isomorphisms).
796 Try to increase `max_symmetries`.
797 * `stoichiometry`: there was a possible assignment in the model, but
798 the model ligand was already assigned to a different target ligand.
799 This indicates different stoichiometries.
800 * `no_contact` (LDDT-PLI only): There were no LDDT contacts between
801 the binding site and the ligand, and LDDT-PLI is undefined.
802 * `target_binding_site` (SCRMSD only): no polymer residues were in
803 proximity of the target ligand.
804 * `model_binding_site` (SCRMSD only): the binding site was not found
805 in the model. Either the binding site was not modeled or the model
806 ligand was positioned too far in combination with
807 `full_bs_search=False`.
809 :param trg_lig_idx: Index of target ligand
810 :type trg_lig_idx: :class:`int`
811 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
812 sentence describing the issue, (\"unknown\",\"unknown\") if
813 nothing obvious can be found.
814 :raises: :class:`RuntimeError` if specified target ligand is assigned
817 raise RuntimeError(
"Specified target ligand is not unassigned")
821 return (
"no_ligand",
"No ligand in the model")
835 return (
"stoichiometry",
836 "Ligand was already assigned to an other "
837 "model ligand (different stoichiometry)")
846 mdl_idx = np.where(self.
state_matrix[trg_lig_idx,:]==6)[0]
849 raise RuntimeError(
"This should never happen")
856 if 6
in tmp
and len(tmp) > 1:
860 if 1
in tmp
and len(tmp) > 1:
868 """ Makes an educated guess why model ligand is not assigned
870 This either returns actual error states or custom states that are
871 derived from them. Currently, the following reasons are reported:
873 * `no_ligand`: there was no ligand in the target.
874 * `disconnected`: the ligand graph is disconnected.
875 * `identity`: the ligand was not found in the target (by graph or
876 subgraph isomorphism). Check your ligand connectivity.
877 * `no_iso`: no full isomorphic match could be found. Try enabling
878 `substructure_match=True` if the target ligand is incomplete.
879 * `symmetries`: too many symmetries were found (by graph isomorphisms).
880 Try to increase `max_symmetries`.
881 * `stoichiometry`: there was a possible assignment in the target, but
882 the model target was already assigned to a different model ligand.
883 This indicates different stoichiometries.
884 * `no_contact` (LDDT-PLI only): There were no LDDT contacts between
885 the binding site and the ligand, and LDDT-PLI is undefined.
886 * `target_binding_site` (SCRMSD only): a potential assignment was found
887 in the target, but there were no polymer residues in proximity of the
888 ligand in the target.
889 * `model_binding_site` (SCRMSD only): a potential assignment was
890 found in the target, but no binding site was found in the model.
891 Either the binding site was not modeled or the model ligand was
892 positioned too far in combination with `full_bs_search=False`.
894 :param mdl_lig_idx: Index of model ligand
895 :type mdl_lig_idx: :class:`int`
896 :returns: :class:`tuple` with two elements: 1) keyword 2) human readable
897 sentence describing the issue, (\"unknown\",\"unknown\") if
898 nothing obvious can be found.
899 :raises: :class:`RuntimeError` if specified model ligand is assigned
902 raise RuntimeError(
"Specified model ligand is not unassigned")
906 return (
"no_ligand",
"No ligand in the target")
920 return (
"stoichiometry",
921 "Ligand was already assigned to an other "
922 "model ligand (different stoichiometry)")
931 trg_idx = np.where(self.
state_matrix[:,mdl_lig_idx]==6)[0]
934 raise RuntimeError(
"This should never happen")
941 if 6
in tmp
and len(tmp) > 1:
945 if 1
in tmp
and len(tmp) > 1:
956 cname = lig.GetChain().GetName()
957 rnum = lig.GetNumber()
958 if cname
not in return_dict:
959 return_dict[cname] = dict()
960 return_dict[cname][rnum] = \
969 cname = lig.GetChain().GetName()
970 rnum = lig.GetNumber()
971 if cname
not in return_dict:
972 return_dict[cname] = dict()
973 return_dict[cname][rnum] = \
979 """ Chain mapper object for the given :attr:`target`.
981 Can be used by child classes if needed, constructed with
982 *resnum_alignments* flag
984 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
1022 chain_mapping._GetRefMdlAlns(self.
_chain_mapper.chem_groups,
1047 Compute score for every possible target-model ligand pair and store the
1048 result in internal matrices.
1054 self.
_score_matrix = np.full(shape, np.nan, dtype=np.float32)
1067 for g_idx, g
in enumerate(target_graphs):
1068 if not networkx.is_connected(g):
1071 msg =
"Disconnected graph observed for target ligand "
1075 for g_idx, g
in enumerate(model_graphs):
1076 if not networkx.is_connected(g):
1079 msg =
"Disconnected graph observed for model ligand "
1084 LogScript(
"Computing pairwise scores for all %s x %s ligands" % shape)
1086 LogInfo(
"Analyzing target ligand %s" % target_ligand)
1094 LogInfo(
"Comparing to model ligand %s" % model_ligand)
1107 model_ligand, target_ligand,
1111 model_graph = model_graphs[model_id],
1112 target_graph = target_graphs[target_id])
1113 LogInfo(
"Ligands %s and %s symmetry match" % (
1114 str(model_ligand), str(target_ligand)))
1115 except NoSymmetryError:
1117 LogInfo(
"No symmetry between %s and %s" % (
1118 str(model_ligand), str(target_ligand)))
1121 except TooManySymmetriesError:
1123 LogWarning(
"Too many symmetries between %s and %s" % (
1124 str(model_ligand), str(target_ligand)))
1127 except NoIsomorphicSymmetryError:
1129 LogInfo(
"No isomorphic symmetry between %s and %s" % (
1130 str(model_ligand), str(target_ligand)))
1133 except DisconnectedGraphError:
1136 LogError(
"Disconnected graph observed for %s and %s" % (
1137 str(model_ligand), str(target_ligand)))
1147 score, pair_state, target_ligand_state, model_ligand_state, aux\
1148 = self.
_compute(symmetries, target_ligand, model_ligand)
1161 raise RuntimeError(f
"Subclass returned state "
1162 f
"\"{state}\" for which no "
1163 f
"description is available. Point "
1164 f
"the developer of the used scorer "
1165 f
"to this error message.")
1169 if target_ligand_state != 0
and pair_state != 6:
1170 raise RuntimeError(
"Observed non-zero target-ligand "
1171 "state which must trigger certain "
1172 "pair state. Point the developer of "
1173 "the used scorer to this error message")
1175 if model_ligand_state != 0
and pair_state != 6:
1176 raise RuntimeError(
"Observed non-zero model-ligand "
1177 "state which must trigger certain "
1178 "pair state. Point the developer of "
1179 "the used scorer to this error message")
1185 if score
is None or np.isnan(score):
1186 raise RuntimeError(
"LigandScorer returned invalid "
1187 "score despite 0 error state")
1190 cvg = len(symmetries[0][0]) / len(model_ligand.atoms)
1194 def _compute(self, symmetries, target_ligand, model_ligand):
1195 """ Compute score for specified ligand pair - defined by child class
1197 Raises :class:`NotImplementedError` if not implemented by child class.
1199 :param symmetries: Defines symmetries between *target_ligand* and
1200 *model_ligand*. Return value of
1201 :func:`ComputeSymmetries`
1202 :type symmetries: :class:`list` of :class:`tuple` with two elements
1203 each: 1) :class:`list` of atom indices in
1204 *target_ligand* 2) :class:`list` of respective atom
1205 indices in *model_ligand*
1206 :param target_ligand: The target ligand
1207 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1208 :class:`ost.mol.ResidueView`
1209 :param model_ligand: The model ligand
1210 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1211 :class:`ost.mol.ResidueView`
1213 :returns: A :class:`tuple` with three elements: 1) a score
1214 (:class:`float`) 2) state (:class:`int`).
1215 3) auxiliary data for this ligand pair (:class:`dict`).
1216 If state is 0, the score and auxiliary data will be
1217 added to :attr:`~score_matrix` and :attr:`~aux_matrix` as well
1218 as the respective value in :attr:`~coverage_matrix`.
1219 Returned score must be valid in this case (not None/NaN).
1220 Child specific non-zero states must be >= 10.
1222 raise NotImplementedError(
"_compute must be implemented by child "
1226 """ Return direction of score - defined by child class
1228 Relevant for ligand assignment. Must return a string in ['+', '-'].
1229 '+' for ascending scores, i.e. higher is better (lddt etc.)
1230 '-' for descending scores, i.e. lower is better (rmsd etc.)
1232 raise NotImplementedError(
"_score_dir must be implemented by child "
1236 """ Copies ligand into entity and returns residue handle
1238 new_chain = ent.FindChain(l.chain.name)
1239 if not new_chain.IsValid():
1240 new_chain = ed.InsertChain(l.chain.name)
1241 ed.SetChainType(new_chain, mol.ChainType.CHAINTYPE_NON_POLY)
1244 already_exists = new_chain.FindResidue(l.number).IsValid()
1246 if rename_ligand_chain:
1250 l.chain.name +
"_" + str(chain_ext)
1251 new_chain = ent.FindChain(new_chain_name)
1252 if new_chain.IsValid():
1257 ed.InsertChain(new_chain_name)
1259 LogInfo(
"Moved ligand residue %s to new chain %s" % (
1260 l.qualified_name, new_chain.name))
1263 "A residue number %s already exists in chain %s" % (
1264 l.number, l.chain.name)
1265 raise RuntimeError(msg)
1268 new_res = ed.AppendResidue(new_chain, l.name, l.number)
1270 for old_atom
in l.atoms:
1271 ed.InsertAtom(new_res, old_atom.name, old_atom.pos,
1272 element=old_atom.element, occupancy=old_atom.occupancy,
1273 b_factor=old_atom.b_factor, is_hetatm=old_atom.is_hetatom)
1275 for old_atom
in l.atoms:
1276 for old_bond
in old_atom.bonds:
1277 new_first = new_res.FindAtom(old_bond.first.name)
1278 new_second = new_res.FindAtom(old_bond.second.name)
1279 ed.Connect(new_first, new_second, old_bond.bond_order)
1280 new_res.SetIsLigand(
True)
1284 """ In principle molck light but logs LigandScorer specific warnings
1286 Only to be applied to polymer entity
1288 1) removes atoms with elements set to H or D (not logged as there is no
1290 2) removes residues with no entry in component dictionary
1291 3) removes all residues that are not peptide_liking or
1292 nucleotide_linking according component dictionary
1293 4) removes unknown atoms according to component dictionary
1297 cleanup_log = {
"cleaned_residues": {
"no_clib": [],
1299 "cleaned_atoms": {
"unknown_atoms": []}}
1302 hydrogen_sel = ent.Select(
"ele == H or ele == D")
1303 if hydrogen_sel.GetAtomCount() > 0:
1306 "ele != H and ele != D"), include_exlusive_atoms=
False)
1309 not_in_clib = list()
1310 not_linking = list()
1311 unknown_atom = list()
1313 for r
in ent.residues:
1314 comp = clib.FindCompound(r.GetName())
1316 not_in_clib.append(r)
1318 if not (comp.IsPeptideLinking()
or comp.IsNucleotideLinking()):
1319 not_linking.append(r)
1321 comp_anames = set([a.name
for a
in comp.atom_specs])
1323 if a.name
not in comp_anames:
1324 unknown_atom.append(a)
1327 if len(not_in_clib) > 0:
1328 cleanup_log[
"cleaned_residues"][
"no_clib"] = \
1330 msg = (
"Expected all residues in receptor structure to be defined in "
1331 "compound library but this is not the case. "
1332 "Please refer to the OpenStructure website if an updated "
1333 "library is sufficient to solve the problem: "
1334 "https://openstructure.org/docs/conop/compoundlib/ "
1335 "These residues will not be considered for scoring (which "
1336 "may also affect pre-processing steps such as alignments): ")
1337 msg +=
','.join(cleanup_log[
"cleaned_residues"][
"no_clib"])
1339 for r
in not_in_clib:
1340 r.SetIntProp(
"clib", 1)
1342 include_exlusive_atoms=
False)
1345 if len(not_linking) > 0:
1346 cleanup_log[
"cleaned_residues"][
"not_linking"] = \
1348 msg = (
"Expected all residues in receptor structure to be peptide "
1349 "linking or nucleotide linking according to the compound "
1350 "library but this is not the case. "
1351 "Please refer to the OpenStructure website if an updated "
1352 "library is sufficient to solve the problem: "
1353 "https://openstructure.org/docs/conop/compoundlib/ "
1354 "These residues will not be considered for scoring (which "
1355 "may also affect pre-processing steps such as alignments): ")
1356 msg +=
','.join(cleanup_log[
"cleaned_residues"][
"not_linking"])
1358 for r
in not_linking:
1359 r.SetIntProp(
"linking", 1)
1361 include_exlusive_atoms=
False)
1364 if len(unknown_atom) > 0:
1365 cleanup_log[
"cleaned_atoms"][
"unknown_atoms"] = \
1367 msg = (
"Expected atom names according to the compound library but "
1368 "this is not the case."
1369 "Please refer to the OpenStructure website if an updated "
1370 "library is sufficient to solve the problem: "
1371 "https://openstructure.org/docs/conop/compoundlib/ "
1372 "These atoms will not be considered for scoring: ")
1373 msg +=
','.join(cleanup_log[
"cleaned_atoms"][
"unknown_atoms"])
1375 for a
in unknown_atom:
1376 a.SetIntProp(
"unknown", 1)
1378 include_exlusive_atoms=
False)
1382 processor.Process(ent)
1384 return (ent, cleanup_log)
1388 """Return a NetworkX graph representation of the residue.
1390 :param residue: the residue from which to derive the graph
1391 :type residue: :class:`ost.mol.ResidueHandle` or
1392 :class:`ost.mol.ResidueView`
1393 :param by_atom_index: Set this parameter to True if you need the nodes to
1394 be labeled by atom index (within the residue).
1395 Otherwise, if False, the nodes will be labeled by
1397 :type by_atom_index: :class:`bool`
1398 :rtype: :class:`~networkx.classes.graph.Graph`
1400 Nodes are labeled with the Atom's uppercase
1401 :attr:`~ost.mol.AtomHandle.element`.
1403 nxg = networkx.Graph()
1405 for atom
in residue.atoms:
1406 nxg.add_node(atom.name, element=atom.element.upper())
1411 nxg.add_edges_from([(
1413 b.second.name)
for a
in residue.atoms
for b
in a.GetBondList()])
1416 nxg = networkx.relabel_nodes(nxg,
1417 {a: b
for a, b
in zip(
1418 [a.name
for a
in residue.atoms],
1419 range(len(residue.atoms)))},
1424 by_atom_index=False, return_symmetries=True,
1425 max_symmetries=1e6, model_graph = None,
1426 target_graph = None):
1427 """Return a list of symmetries (isomorphisms) of the model onto the target
1430 :param model_ligand: The model ligand
1431 :type model_ligand: :class:`ost.mol.ResidueHandle` or
1432 :class:`ost.mol.ResidueView`
1433 :param target_ligand: The target ligand
1434 :type target_ligand: :class:`ost.mol.ResidueHandle` or
1435 :class:`ost.mol.ResidueView`
1436 :param substructure_match: Set this to True to allow partial ligands
1438 :type substructure_match: :class:`bool`
1439 :param by_atom_index: Set this parameter to True if you need the symmetries
1440 to refer to atom index (within the residue).
1441 Otherwise, if False, the symmetries refer to atom
1443 :type by_atom_index: :class:`bool`
1444 :type return_symmetries: If Truthy, return the mappings, otherwise simply
1445 return True if a mapping is found (and raise if
1446 no mapping is found). This is useful to quickly
1447 find out if a mapping exist without the expensive
1448 step to find all the mappings.
1449 :type return_symmetries: :class:`bool`
1450 :param max_symmetries: If more than that many isomorphisms exist, raise
1451 a :class:`TooManySymmetriesError`. This can only be assessed by
1452 generating at least that many isomorphisms and can take some time.
1453 :type max_symmetries: :class:`int`
1454 :raises: :class:`NoSymmetryError` when no symmetry can be found;
1455 :class:`NoIsomorphicSymmetryError` in case of isomorphic
1456 subgraph but *substructure_match* is False;
1457 :class:`TooManySymmetriesError` when more than `max_symmetries`
1458 isomorphisms are found; :class:`DisconnectedGraphError` if
1459 graph for *model_ligand*/*target_ligand* is disconnected.
1463 if model_graph
is None:
1465 by_atom_index=by_atom_index)
1467 if not networkx.is_connected(model_graph):
1468 msg =
"Disconnected graph for model ligand %s" % model_ligand
1472 if target_graph
is None:
1474 by_atom_index=by_atom_index)
1476 if not networkx.is_connected(target_graph):
1477 msg =
"Disconnected graph for target ligand %s" % target_ligand
1484 gm = networkx.algorithms.isomorphism.GraphMatcher(
1485 model_graph, target_graph, node_match=
lambda x, y:
1486 x[
"element"] == y[
"element"])
1487 if gm.is_isomorphic():
1488 if not return_symmetries:
1491 for i, isomorphism
in enumerate(gm.isomorphisms_iter()):
1492 if i >= max_symmetries:
1494 "Too many symmetries between %s and %s" % (
1495 str(model_ligand), str(target_ligand)))
1496 symmetries.append((list(isomorphism.values()),
1497 list(isomorphism.keys())))
1498 assert len(symmetries) > 0
1499 LogVerbose(
"Found %s isomorphic mappings (symmetries)" % len(symmetries))
1500 elif gm.subgraph_is_isomorphic()
and substructure_match:
1501 if not return_symmetries:
1504 for i, isomorphism
in enumerate(gm.subgraph_isomorphisms_iter()):
1505 if i >= max_symmetries:
1507 "Too many symmetries between %s and %s" % (
1508 str(model_ligand), str(target_ligand)))
1509 symmetries.append((list(isomorphism.values()),
1510 list(isomorphism.keys())))
1511 assert len(symmetries) > 0
1513 assert len(symmetries[0][0]) == len(target_ligand.atoms)
1514 n_sym = len(symmetries)
1515 LogVerbose(
"Found %s subgraph isomorphisms (symmetries)" % n_sym)
1516 elif gm.subgraph_is_isomorphic():
1517 LogVerbose(
"Found subgraph isomorphisms (symmetries), but"
1518 " ignoring because substructure_match=False")
1520 str(model_ligand), str(target_ligand)))
1522 LogVerbose(
"Found no isomorphic mappings (symmetries)")
1524 str(model_ligand), str(target_ligand)))
1529 """ Exception raised when no symmetry can be found.
1533class NoIsomorphicSymmetryError(ValueError):
1534 """ Exception raised when no isomorphic symmetry can be found
1536 There would be isomorphic subgraphs for which symmetries can
1537 be found, but substructure_match is disabled
1542 """ Exception raised when too many symmetries are found.
1547 """ Exception raised when the ligand graph is disconnected.
1552__all__ = (
'LigandScorer',
'ComputeSymmetries',
'NoSymmetryError',
1553 'NoIsomorphicSymmetryError',
'TooManySymmetriesError',
1554 'DisconnectedGraphError')
get_target_ligand_state_report(self, trg_lig_idx)
__mdl_chains_without_chem_mapping
mdl_map_nuc_seqid_thr(self)
unassigned_target_ligands_reasons(self)
_copy_ligand(self, l, ent, ed, rename_ligand_chain)
__init__(self, model, target, model_ligands, target_ligands, resnum_alignments=False, substructure_match=False, coverage_delta=0.2, max_symmetries=1e5, rename_ligand_chain=False, min_pep_length=6, min_nuc_length=4, pep_seqid_thr=95., nuc_seqid_thr=95., mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0., seqres=None, trg_seqres_mapping=None)
unassigned_model_ligands_reasons(self)
_compute(self, symmetries, target_ligand, model_ligand)
model_ligand_states(self)
get_model_ligand_state_report(self, mdl_lig_idx)
_mdl_chains_without_chem_mapping(self)
_get_report(self, ligand_state, pair_states)
unassigned_model_ligands(self)
target_ligand_states(self)
guess_model_ligand_unassigned_reason(self, mdl_lig_idx)
guess_target_ligand_unassigned_reason(self, trg_lig_idx)
mdl_map_pep_seqid_thr(self)
unassigned_target_ligands(self)
_cleanup_polymer_ent(self, ent, clib)
_ResidueToGraph(residue, by_atom_index=False)
_QualifiedAtomNotation(a)
_QualifiedResidueNotation(r)
ComputeSymmetries(model_ligand, target_ligand, substructure_match=False, by_atom_index=False, return_symmetries=True, max_symmetries=1e6, model_graph=None, target_graph=None)
EntityHandle DLLEXPORT_OST_MOL CreateEntityFromView(const EntityView &view, bool include_exlusive_atoms, EntityHandle handle=EntityHandle())
create new entity handle from entity view