6 from ost
import settings
8 from ost
import LogScript, LogInfo, LogDebug
20 from ost
import bindings
26 """Scorer specific for a reference/model pair
28 Finds best possible binding site representation of reference in model given
29 lDDT score. Uses :class:`ost.mol.alg.chain_mapping.ChainMapper` to deal with
32 :param reference: Reference structure
33 :type reference: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
34 :param model: Model structure
35 :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
36 :param residue_number_alignment: Passed to ChainMapper constructor
37 :type residue_number_alignment: :class:`bool`
40 residue_number_alignment=False):
42 resnum_alignments=residue_number_alignment)
46 def ScoreBS(self, ligand, radius = 4.0, lddt_radius=10.0):
47 """Computes binding site lDDT score given *ligand*. Best possible
48 binding site representation is selected by lDDT but other scores such as
49 CA based RMSD and GDT are computed too and returned.
51 :param ligand: Defines the scored binding site, i.e. provides positions
52 to perform proximity search
53 :type ligand: r'((Residue)|(Chain)|(Entity))((View)|(Handle))'
54 :param radius: Reference residues with any atom position within *radius*
55 of *ligand* consitute the scored binding site
56 :type radius: :class:`float`
57 :param lddt_radius: Passed as *inclusion_radius* to
58 :class:`ost.mol.alg.lddt.lDDTScorer`
59 :type lddt_radius: :class:`float`
60 :returns: Object of type :class:`ost.mol.alg.chain_mapping.ReprResult`
61 containing all atom lDDT score and mapping information.
62 None if no representation could be found.
66 ref_residues_hashes = set()
67 for ligand_at
in ligand.atoms:
68 close_atoms = self.
refref.FindWithin(ligand_at.GetPos(), radius)
69 for close_at
in close_atoms:
70 ref_res = close_at.GetResidue()
71 h = ref_res.handle.GetHashCode()
72 if h
not in ref_residues_hashes:
73 ref_residues_hashes.add(h)
78 ref_bs = self.
refref.CreateEmptyView()
79 for ch
in self.
refref.chains:
81 if r.handle.GetHashCode()
in ref_residues_hashes:
82 ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
86 inclusion_radius = lddt_radius)
94 """ Helper class to access the various scores available from ost.mol.alg
96 Deals with structure cleanup, chain mapping, interface identification etc.
97 Intermediate results are available as attributes.
99 :param model: Model structure - a deep copy is available as :attr:`model`.
100 Additionally, :func:`ost.mol.alg.Molck` using *molck_settings*
102 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
103 :param target: Target structure - a deep copy is available as :attr:`target`.
104 Additionally, :func:`ost.mol.alg.Molck` using *molck_settings*
106 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
107 :param resnum_alignments: Whether alignments between chemically equivalent
108 chains in *model* and *target* can be computed
109 based on residue numbers. This can be assumed in
110 benchmarking setups such as CAMEO/CASP.
111 :type resnum_alignments: :class:`bool`
112 :param molck_settings: Settings used for Molck on *model* and *target*, if
113 set to None, a default object is constructed by
114 setting everything except rm_zero_occ_atoms and
116 :class:`ost.mol.alg.MolckSettings` constructor.
117 :type molck_settings: :class:`ost.mol.alg.MolckSettings`
118 :param cad_score_exec: Explicit path to voronota-cadscore executable from
119 voronota installation from
120 https://github.com/kliment-olechnovic/voronota. If
121 not given, voronota-cadscore must be in PATH if any
122 of the CAD score related attributes is requested.
123 :type cad_score_exec: :class:`str`
124 :param custom_mapping: Provide custom chain mapping between *model* and
125 *target*. Dictionary with target chain names as key
126 and model chain names as value.
127 :attr:`~mapping` is constructed from this.
128 :type custom_mapping: :class:`dict`
129 :param custom_rigid_mapping: Provide custom chain mapping between *model*
130 and *target*. Dictionary with target chain
131 names as key and model chain names as value.
132 :attr:`~rigid_mapping` is constructed from
134 :type custom_rigid_mapping: :class:`dict`
135 :param usalign_exec: Explicit path to USalign executable used to compute
136 TM-score. If not given, TM-score will be computed
137 with OpenStructure internal copy of USalign code.
138 :type usalign_exec: :class:`str`
139 :param lddt_no_stereochecks: Whether to compute lDDT without stereochemistry
141 :type lddt_no_stereochecks: :class:`bool`
142 :param n_max_naive: Parameter for chain mapping. If the number of possible
143 mappings is <= *n_max_naive*, the full
144 mapping solution space is enumerated to find the
145 the optimum. A heuristic is used otherwise. The default
146 of 40320 corresponds to an octamer (8! = 40320).
147 A structure with stoichiometry A6B2 would be
149 :type n_max_naive: :class:`int`
150 :param oum: Override USalign Mapping. Inject rigid_mapping of
151 :class:`Scorer` object into USalign to compute TM-score.
152 Experimental feature with limitations.
153 :type oum: :class:`bool`
154 :param min_pep_length: Relevant parameter if short peptides are involved in
155 scoring. Minimum peptide length for a chain in the
156 target structure to be considered in chain mapping.
157 The chain mapping algorithm first performs an all vs.
158 all pairwise sequence alignment to identify \"equal\"
159 chains within the target structure. We go for simple
160 sequence identity there. Short sequences can be
161 problematic as they may produce high sequence identity
162 alignments by pure chance.
163 :type min_pep_length: :class:`int`
164 :param min_nuc_length: Relevant parameter if short nucleotides are involved
165 in scoring. Minimum nucleotide length for a chain in
166 the target structure to be considered in chain
167 mapping. The chain mapping algorithm first performs
168 an all vs. all pairwise sequence alignment to
169 identify \"equal\" chains within the target
170 structure. We go for simple sequence identity there.
171 Short sequences can be problematic as they may
172 produce high sequence identity alignments by pure
174 :type min_nuc_length: :class:`int`
175 :param lddt_add_mdl_contacts: lDDT specific flag. Only using contacts in
176 lDDT that are within a certain distance
177 threshold in the target does not penalize
178 for added model contacts. If set to True, this
179 flag will also consider target contacts
180 that are within the specified distance
181 threshold in the model but not necessarily in
182 the target. No contact will be added if the
183 respective atom pair is not resolved in the
185 :type lddt_add_mdl_contacts: :class:`bool`
186 :param dockq_capri_peptide: Flag that changes two things in the way DockQ
187 and its underlying scores are computed which is
188 proposed by the CAPRI community when scoring
189 peptides (PMID: 31886916).
190 ONE: Two residues are considered in contact if
191 any of their atoms is within 5A. This is
192 relevant for fnat and fnonat scores. CAPRI
193 suggests to lower this threshold to 4A for
194 protein-peptide interactions.
195 TWO: irmsd is computed on interface residues. A
196 residue is defined as interface residue if any
197 of its atoms is within 10A of another chain.
198 CAPRI suggests to lower the default of 10A to
199 8A in combination with only considering CB atoms
200 for protein-peptide interactions.
201 This flag has no influence on patch_dockq
203 :type dockq_capri_peptide: :class:`bool`
205 def __init__(self, model, target, resnum_alignments=False,
206 molck_settings = None, cad_score_exec = None,
207 custom_mapping=None, custom_rigid_mapping=None,
208 usalign_exec = None, lddt_no_stereochecks=False,
209 n_max_naive=40320, oum=False, min_pep_length = 6,
210 min_nuc_length = 4, lddt_add_mdl_contacts=False,
211 dockq_capri_peptide=False):
226 if molck_settings
is None:
231 rm_zero_occ_atoms=
False,
235 LogScript(
"Cleaning up input structures")
236 Molck(self.
_model_model, conop.GetDefaultLib(), molck_settings)
237 Molck(self.
_target_target, conop.GetDefaultLib(), molck_settings)
238 self.
_model_model = self.
_model_model.Select(
"peptide=True or nucleotide=True")
239 self.
_target_target = self.
_target_target.Select(
"peptide=True or nucleotide=True")
242 for ch
in self.
_model_model.chains:
243 if ch.GetName().strip() ==
"":
244 raise RuntimeError(
"Model chains must have valid chain names")
245 if ch.GetName().strip() ==
"'" or ch.GetName().strip() ==
'"':
246 raise RuntimeError(
"Model chains cannot be named with quote signs (' or \"\")")
249 for ch
in self.
_target_target.chains:
250 if ch.GetName().strip() ==
"":
251 raise RuntimeError(
"Target chains must have valid chain names")
252 if ch.GetName().strip() ==
"'" or ch.GetName().strip() ==
'"':
253 raise RuntimeError(
"Target chains cannot be named with quote signs (' or \"\")")
255 if resnum_alignments:
259 for ch
in self.
_model_model.chains:
260 ins_codes = [r.GetNumber().GetInsCode()
for r
in ch.residues]
261 if len(set(ins_codes)) != 1
or ins_codes[0] !=
'\0':
262 raise RuntimeError(
"Residue numbers in each model chain "
263 "must not contain insertion codes if "
264 "resnum_alignments are enabled")
265 nums = [r.GetNumber().GetNum()
for r
in ch.residues]
266 if not all(i < j
for i, j
in zip(nums, nums[1:])):
267 raise RuntimeError(
"Residue numbers in each model chain "
268 "must be strictly increasing if "
269 "resnum_alignments are enabled")
271 for ch
in self.
_target_target.chains:
272 ins_codes = [r.GetNumber().GetInsCode()
for r
in ch.residues]
273 if len(set(ins_codes)) != 1
or ins_codes[0] !=
'\0':
274 raise RuntimeError(
"Residue numbers in each target chain "
275 "must not contain insertion codes if "
276 "resnum_alignments are enabled")
277 nums = [r.GetNumber().GetNum()
for r
in ch.residues]
278 if not all(i < j
for i, j
in zip(nums, nums[1:])):
279 raise RuntimeError(
"Residue numbers in each target chain "
280 "must be strictly increasing if "
281 "resnum_alignments are enabled")
283 if usalign_exec
is not None:
284 if not os.path.exists(usalign_exec):
285 raise RuntimeError(f
"USalign exec ({usalign_exec}) "
287 if not os.access(usalign_exec, os.X_OK):
288 raise RuntimeError(f
"USalign exec ({usalign_exec}) "
289 f
"is not executable")
327 self.
_lddt_lddt =
None
360 self.
_fnat_fnat =
None
364 self.
_nnat_nnat =
None
365 self.
_nmdl_nmdl =
None
391 self.
_rmsd_rmsd =
None
402 if custom_mapping
is not None:
405 if custom_rigid_mapping
is not None:
408 LogDebug(
"Scorer sucessfully initialized")
412 """ Model with Molck cleanup
414 :type: :class:`ost.mol.EntityHandle`
420 """ The original model passed at object construction
422 :type: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
428 """ Target with Molck cleanup
430 :type: :class:`ost.mol.EntityHandle`
436 """ The original target passed at object construction
438 :type: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
444 """ Alignments of :attr:`model`/:attr:`target` chains
446 Alignments for each pair of chains mapped in :attr:`mapping`.
447 First sequence is target sequence, second sequence the model sequence.
449 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
451 if self.
_aln_aln
is None:
457 """ Stereochecked equivalent of :attr:`aln`
459 The alignments may differ, as stereochecks potentially remove residues
461 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
469 """ Alignments of :attr:`model_orig`/:attr:`target_orig` chains
471 Selects for peptide and nucleotide residues before sequence
472 extraction. Includes residues that would be removed by molck in
473 structure preprocessing.
475 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
483 """ View of :attr:`~model` that has stereochemistry checks applied
485 First, a selection for peptide/nucleotide residues is performed,
486 secondly peptide sidechains with stereochemical irregularities are
487 removed (full residue if backbone atoms are involved). Irregularities
488 are clashes or bond lengths/angles more than 12 standard deviations
489 from expected values.
491 :type: :class:`ost.mol.EntityView`
499 """ Clashing model atoms
501 :type: :class:`list` of :class:`ost.mol.alg.stereochemistry.ClashInfo`
509 """ Model bonds with unexpected stereochemistry
511 :type: :class:`list` of
512 :class:`ost.mol.alg.stereochemistry.BondViolationInfo`
520 """ Model angles with unexpected stereochemistry
522 :type: :class:`list` of
523 :class:`ost.mol.alg.stereochemistry.AngleViolationInfo`
531 """ Same as :attr:`~stereochecked_model` for :attr:`~target`
533 :type: :class:`ost.mol.EntityView`
541 """ Clashing target atoms
543 :type: :class:`list` of :class:`ost.mol.alg.stereochemistry.ClashInfo`
551 """ Target bonds with unexpected stereochemistry
553 :type: :class:`list` of
554 :class:`ost.mol.alg.stereochemistry.BondViolationInfo`
562 """ Target angles with unexpected stereochemistry
564 :type: :class:`list` of
565 :class:`ost.mol.alg.stereochemistry.AngleViolationInfo`
573 """ Chain mapper object for given :attr:`target`
575 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
587 """ Full chain mapping result for :attr:`target`/:attr:`model`
589 Computed with :func:`ost.mol.alg.ChainMapper.GetMapping`
591 :type: :class:`ost.mol.alg.chain_mapping.MappingResult`
594 LogScript(
"Computing chain mapping")
602 """ Full chain mapping result for :attr:`target`/:attr:`model`
604 Computed with :func:`ost.mol.alg.ChainMapper.GetRMSDMapping`
606 :type: :class:`ost.mol.alg.chain_mapping.MappingResult`
609 LogScript(
"Computing rigid chain mapping")
616 """ Interface residues in :attr:`~model`
618 Thats all residues having a contact with at least one residue from
619 another chain (CB-CB distance <= 8A, CA in case of Glycine)
621 :type: :class:`dict` with chain names as key and and :class:`list`
622 with residue numbers of the respective interface residues.
631 """ Same as :attr:`~model_interface_residues` for :attr:`~target`
633 :type: :class:`dict` with chain names as key and and :class:`list`
634 with residue numbers of the respective interface residues.
643 """ lDDT scorer for :attr:`~stereochecked_target` (default parameters)
645 :type: :class:`ost.mol.alg.lddt.lDDTScorer`
656 """ Backbone only lDDT scorer for :attr:`~target`
658 No stereochecks applied for bb only lDDT which considers CA atoms
659 for peptides and C3' atoms for nucleotides.
661 :type: :class:`ost.mol.alg.lddt.lDDTScorer`
669 """ QS scorer constructed from :attr:`~mapping`
671 The scorer object is constructed with default parameters and relates to
672 :attr:`~model` and :attr:`~target` (no stereochecks).
674 :type: :class:`ost.mol.alg.qsscore.QSScorer`
689 """ Global lDDT score in range [0.0, 1.0]
691 Computed based on :attr:`~stereochecked_model`. In case of oligomers,
692 :attr:`~mapping` is used.
694 :type: :class:`float`
696 if self.
_lddt_lddt
is None:
698 return self.
_lddt_lddt
702 """ Per residue lDDT scores in range [0.0, 1.0]
704 Computed based on :attr:`~stereochecked_model` but scores for all
705 residues in :attr:`~model` are reported. If a residue has been removed
706 by stereochemistry checks, the respective score is set to 0.0. If a
707 residue is not covered by the target or is in a chain skipped by the
708 chain mapping procedure (happens for super short chains), the respective
709 score is set to None. In case of oligomers, :attr:`~mapping` is used.
719 """ Backbone only global lDDT score in range [0.0, 1.0]
721 Computed based on :attr:`~model` on backbone atoms only. This is CA for
722 peptides and C3' for nucleotides. No stereochecks are performed. In case
723 of oligomers, :attr:`~mapping` is used.
725 :type: :class:`float`
733 """ Backbone only per residue lDDT scores in range [0.0, 1.0]
735 Computed based on :attr:`~model` on backbone atoms only. This is CA for
736 peptides and C3' for nucleotides. No stereochecks are performed. If a
737 residue is not covered by the target or is in a chain skipped by the
738 chain mapping procedure (happens for super short chains), the respective
739 score is set to None. In case of oligomers, :attr:`~mapping` is used.
749 """ Global interface lDDT score in range [0.0, 1.0]
751 This is lDDT only based on inter-chain contacts. Value is None if no
752 such contacts are present. For example if we're dealing with a monomer.
753 Computed based on :attr:`~stereochecked_model` and :attr:`~mapping` for
756 :type: :class:`float`
758 if self.
_ilddt_ilddt
is None:
769 Computed based on :attr:`model` using :attr:`mapping`
771 :type: :class:`float`
779 """ Global QS-score - only computed on aligned residues
781 Computed based on :attr:`model` using :attr:`mapping`. The QS-score
782 computation only considers contacts between residues with a mapping
783 between target and model. As a result, the score won't be lowered in
784 case of additional chains/residues in any of the structures.
786 :type: :class:`float`
794 """ Interfaces in :attr:`~target` with non-zero contribution to
795 :attr:`~qs_global`/:attr:`~qs_best`
797 Chain names are lexicographically sorted.
799 :type: :class:`list` of :class:`tuple` with 2 elements each:
810 """ Interfaces in :attr:`~model` with non-zero contribution to
811 :attr:`~qs_global`/:attr:`~qs_best`
813 Chain names are lexicographically sorted.
815 :type: :class:`list` of :class:`tuple` with 2 elements each:
827 """ Interfaces in :attr:`~qs_target_interfaces` that can be mapped
830 Target chain names are lexicographically sorted.
832 :type: :class:`list` of :class:`tuple` with 4 elements each:
833 (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2)
837 flat_mapping = self.
mappingmapping.GetFlatMapping()
839 if i[0]
in flat_mapping
and i[1]
in flat_mapping:
847 """ QS-score for each interface in :attr:`qs_interfaces`
849 :type: :class:`list` of :class:`float`
857 """ QS-score for each interface in :attr:`qs_interfaces`
859 Only computed on aligned residues
861 :type: :class:`list` of :class:`float`
871 A contact is a pair or residues from distinct chains that have
872 a minimal heavy atom distance < 5A. Contacts are specified as
873 :class:`tuple` with two strings in format:
874 <cname>.<rnum>.<ins_code>
876 :type: :class:`list` of :class:`tuple`
892 """ Interfaces in :class:`target` which have at least one contact
894 Contact as defined in :attr:`~native_contacts`,
895 chain names are lexicographically sorted.
897 :type: :class:`list` of :class:`tuple` with 2 elements each
902 tmp = [(min(x[0],x[1]), max(x[0],x[1]))
for x
in tmp]
908 """ Interfaces in :class:`model` which have at least one contact
910 Contact as defined in :attr:`~native_contacts`,
911 chain names are lexicographically sorted.
913 :type: :class:`list` of :class:`tuple` with 2 elements each
918 tmp = [(min(x[0],x[1]), max(x[0],x[1]))
for x
in tmp]
924 """ Fraction of model contacts that are also present in target
926 :type: :class:`float`
934 """ Fraction of target contacts that are correctly reproduced in model
936 :type: :class:`float`
944 """ ICS (Interface Contact Similarity) score
946 Combination of :attr:`~ics_precision` and :attr:`~ics_recall`
949 :type: :class:`float`
951 if self.
_ics_ics
is None:
957 """ Per-interface ICS precision
959 :attr:`~ics_precision` for each interface in
960 :attr:`~contact_target_interfaces`
962 :type: :class:`list` of :class:`float`
971 """ Per-interface ICS recall
973 :attr:`~ics_recall` for each interface in
974 :attr:`~contact_target_interfaces`
976 :type: :class:`list` of :class:`float`
984 """ Per-interface ICS (Interface Contact Similarity) score
986 :attr:`~ics` for each interface in
987 :attr:`~contact_target_interfaces`
989 :type: :class:`float`
999 """ Fraction of model interface residues that are also interface
1002 :type: :class:`float`
1010 """ Fraction of target interface residues that are also interface
1013 :type: :class:`float`
1021 """ IPS (Interface Patch Similarity) score
1023 Jaccard coefficient of interface residues in target and their mapped
1024 counterparts in model
1026 :type: :class:`float`
1028 if self.
_ips_ips
is None:
1030 return self.
_ips_ips
1034 """ Per-interface IPS precision
1036 :attr:`~ips_precision` for each interface in
1037 :attr:`~contact_target_interfaces`
1039 :type: :class:`list` of :class:`float`
1048 """ Per-interface IPS recall
1050 :attr:`~ips_recall` for each interface in
1051 :attr:`~contact_target_interfaces`
1053 :type: :class:`list` of :class:`float`
1061 """ Per-interface IPS (Interface Patch Similarity) score
1063 :attr:`~ips` for each interface in
1064 :attr:`~contact_target_interfaces`
1066 :type: :class:`list` of :class:`float`
1075 """ Interfaces in :attr:`target` that are relevant for DockQ
1077 All interfaces in :attr:`~target` with non-zero contacts that are
1078 relevant for DockQ. Chain names are lexicographically sorted.
1080 :type: :class:`list` of :class:`tuple` with 2 elements each:
1090 contact_d = contact_d)
1093 interfaces = cent.interacting_chains
1094 interfaces = [(min(x[0],x[1]), max(x[0],x[1]))
for x
in interfaces]
1097 pep_seqs = set([s.GetName()
for s
in self.
chain_mapperchain_mapper.polypep_seqs])
1099 for interface
in interfaces:
1100 if interface[0]
in pep_seqs
and interface[1]
in pep_seqs:
1107 """ Interfaces in :attr:`dockq_target_interfaces` that can be mapped
1110 Target chain names are lexicographically sorted
1112 :type: :class:`list` of :class:`tuple` with 4 elements each:
1113 (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2)
1117 flat_mapping = self.
mappingmapping.GetFlatMapping()
1119 if i[0]
in flat_mapping
and i[1]
in flat_mapping:
1122 flat_mapping[i[1]]))
1127 """ DockQ scores for interfaces in :attr:`~dockq_interfaces`
1129 :class:`list` of :class:`float`
1137 """ fnat scores for interfaces in :attr:`~dockq_interfaces`
1139 fnat: Fraction of native contacts that are also present in model
1141 :class:`list` of :class:`float`
1143 if self.
_fnat_fnat
is None:
1145 return self.
_fnat_fnat
1149 """ N native contacts for interfaces in :attr:`~dockq_interfaces`
1151 :class:`list` of :class:`int`
1153 if self.
_nnat_nnat
is None:
1155 return self.
_nnat_nnat
1159 """ N model contacts for interfaces in :attr:`~dockq_interfaces`
1161 :class:`list` of :class:`int`
1163 if self.
_nmdl_nmdl
is None:
1165 return self.
_nmdl_nmdl
1169 """ fnonnat scores for interfaces in :attr:`~dockq_interfaces`
1171 fnat: Fraction of model contacts that are not present in target
1173 :class:`list` of :class:`float`
1181 """ irmsd scores for interfaces in :attr:`~dockq_interfaces`
1183 irmsd: RMSD of interface (RMSD computed on N, CA, C, O atoms) which
1184 consists of each residue that has at least one heavy atom within 10A of
1187 :class:`list` of :class:`float`
1189 if self.
_irmsd_irmsd
is None:
1195 """ lrmsd scores for interfaces in :attr:`~dockq_interfaces`
1197 lrmsd: The interfaces are superposed based on the receptor (rigid
1198 min RMSD superposition) and RMSD for the ligand is reported.
1199 Superposition and RMSD are based on N, CA, C and O positions,
1200 receptor is the chain contributing to the interface with more
1203 :class:`list` of :class:`float`
1205 if self.
_lrmsd_lrmsd
is None:
1211 """ Average of DockQ scores in :attr:`dockq_scores`
1213 In its original implementation, DockQ only operates on single
1214 interfaces. Thus the requirement to combine scores for higher order
1217 :type: :class:`float`
1225 """ Same as :attr:`dockq_ave`, weighted by native contacts
1227 :type: :class:`float`
1235 """ Same as :attr:`~dockq_ave` but penalizing for missing interfaces
1237 Interfaces that are not covered in model are added as 0.0
1238 in average computation.
1240 :type: :class:`float`
1248 """ Same as :attr:`~dockq_ave_full`, but weighted
1250 Interfaces that are not covered in model are added as 0.0 in
1251 average computations and the respective weights are derived from
1252 number of contacts in respective target interface.
1260 """ Mapped representative positions in target
1262 Thats CA positions for peptide residues and C3' positions for
1263 nucleotides. Has same length as :attr:`~mapped_model_pos` and mapping
1264 is based on :attr:`~mapping`.
1266 :type: :class:`ost.geom.Vec3List`
1274 """ Mapped representative positions in model
1276 Thats CA positions for peptide residues and C3' positions for
1277 nucleotides. Has same length as :attr:`~mapped_target_pos` and mapping
1278 is based on :attr:`~mapping`.
1280 :type: :class:`ost.geom.Vec3List`
1288 """ :attr:`~mapped_model_pos` with :attr:`~transform` applied
1290 :type: :class:`ost.geom.Vec3List`
1300 """ Number of target residues which have no mapping to model
1310 """ Transform: :attr:`~mapped_model_pos` onto :attr:`~mapped_target_pos`
1312 Computed using Kabsch minimal rmsd algorithm
1314 :type: :class:`ost.geom.Mat4`
1320 self.
_transform_transform = res.transformation
1327 """ Mapped representative positions in target
1329 Thats CA positions for peptide residues and C3' positions for
1330 nucleotides. Has same length as :attr:`~rigid_mapped_model_pos` and mapping
1331 is based on :attr:`~rigid_mapping`.
1333 :type: :class:`ost.geom.Vec3List`
1341 """ Mapped representative positions in model
1343 Thats CA positions for peptide residues and C3' positions for
1344 nucleotides. Has same length as :attr:`~mapped_target_pos` and mapping
1345 is based on :attr:`~rigid_mapping`.
1347 :type: :class:`ost.geom.Vec3List`
1355 """ :attr:`~rigid_mapped_model_pos` with :attr:`~rigid_transform` applied
1357 :type: :class:`ost.geom.Vec3List`
1367 """ Number of target residues which have no rigid mapping to model
1377 """ Transform: :attr:`~rigid_mapped_model_pos` onto :attr:`~rigid_mapped_target_pos`
1379 Computed using Kabsch minimal rmsd algorithm
1381 :type: :class:`ost.geom.Mat4`
1394 """ Fraction CA (C3' for nucleotides) that can be superposed within 0.5A
1396 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1397 Similar iterative algorithm as LGA tool
1399 :type: :class:`float`
1401 if self.
_gdt_05_gdt_05
is None:
1405 self.
_gdt_05_gdt_05 = float(n) / n_full
1412 """ Fraction CA (C3' for nucleotides) that can be superposed within 1.0A
1414 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1415 Similar iterative algorithm as LGA tool
1417 :type: :class:`float`
1419 if self.
_gdt_1_gdt_1
is None:
1423 self.
_gdt_1_gdt_1 = float(n) / n_full
1430 """ Fraction CA (C3' for nucleotides) that can be superposed within 2.0A
1432 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1433 Similar iterative algorithm as LGA tool
1436 :type: :class:`float`
1438 if self.
_gdt_2_gdt_2
is None:
1442 self.
_gdt_2_gdt_2 = float(n) / n_full
1449 """ Fraction CA (C3' for nucleotides) that can be superposed within 4.0A
1451 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1452 Similar iterative algorithm as LGA tool
1454 :type: :class:`float`
1456 if self.
_gdt_4_gdt_4
is None:
1460 self.
_gdt_4_gdt_4 = float(n) / n_full
1467 """ Fraction CA (C3' for nucleotides) that can be superposed within 8.0A
1469 Similar iterative algorithm as LGA tool
1471 :type: :class:`float`
1473 if self.
_gdt_8_gdt_8
is None:
1477 self.
_gdt_8_gdt_8 = float(n) / n_full
1485 """ avg GDT with thresholds: 8.0A, 4.0A, 2.0A and 1.0A
1487 :type: :class:`float`
1489 if self.
_gdtts_gdtts
is None:
1490 LogScript(
"Computing GDT-TS score")
1496 """ avg GDT with thresholds: 4.0A, 2.0A, 1.0A and 0.5A
1498 :type: :class:`float`
1500 if self.
_gdtha_gdtha
is None:
1501 LogScript(
"Computing GDT-HA score")
1509 Computed on :attr:`~rigid_transformed_mapped_model_pos` and
1510 :attr:`rigid_mapped_target_pos`
1512 :type: :class:`float`
1514 if self.
_rmsd_rmsd
is None:
1515 LogScript(
"Computing RMSD")
1518 return self.
_rmsd_rmsd
1522 """ The global CAD atom-atom (AA) score
1524 Computed based on :attr:`~model`. In case of oligomers, :attr:`~mapping`
1527 :type: :class:`float`
1535 """ The per-residue CAD atom-atom (AA) scores
1537 Computed based on :attr:`~model`. In case of oligomers, :attr:`~mapping`
1540 :type: :class:`dict`
1548 """ Patch QS-scores for each residue in :attr:`model_interface_residues`
1550 Representative patches for each residue r in chain c are computed as
1553 * mdl_patch_one: All residues in c with CB (CA for GLY) positions within
1554 8A of r and within 12A of residues from any other chain.
1555 * mdl_patch_two: Closest residue x to r in any other chain gets
1556 identified. Patch is then constructed by selecting all residues from
1557 any other chain within 8A of x and within 12A from any residue in c.
1558 * trg_patch_one: Chain name and residue number based mapping from
1560 * trg_patch_two: Chain name and residue number based mapping from
1563 Results are stored in the same manner as
1564 :attr:`model_interface_residues`, with corresponding scores instead of
1565 residue numbers. Scores for residues which are not
1566 :class:`mol.ChemType.AMINOACIDS` are set to None. Additionally,
1567 interface patches are derived from :attr:`model`. If they contain
1568 residues which are not covered by :attr:`target`, the score is set to
1571 :type: :class:`dict` with chain names as key and and :class:`list`
1572 with scores of the respective interface residues.
1580 """ Same as :attr:`patch_qs` but for DockQ scores
1588 """ TM-score computed with USalign
1590 USalign executable can be specified with usalign_exec kwarg at Scorer
1591 construction, an OpenStructure internal copy of the USalign code is
1594 :type: :class:`float`
1602 """ Mapping computed with USalign
1604 Dictionary with target chain names as key and model chain names as
1605 values. No guarantee that all chains are mapped. USalign executable
1606 can be specified with usalign_exec kwarg at Scorer construction, an
1607 OpenStructure internal copy of the USalign code is used otherwise.
1609 :type: :class:`dict`
1615 def _aln_helper(self, target, model):
1620 for ch
in target.chains:
1621 cname = ch.GetName()
1622 s =
''.join([r.one_letter_code
for r
in ch.residues])
1623 s = seq.CreateSequence(ch.GetName(), s)
1624 s.AttachView(target.Select(f
"cname={mol.QueryQuoteName(cname)}"))
1625 trg_seqs[ch.GetName()] = s
1627 for ch
in model.chains:
1628 cname = ch.GetName()
1629 s =
''.join([r.one_letter_code
for r
in ch.residues])
1630 s = seq.CreateSequence(cname, s)
1631 s.AttachView(model.Select(f
"cname={mol.QueryQuoteName(cname)}"))
1632 mdl_seqs[ch.GetName()] = s
1635 trg_pep_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polypep_seqs]
1636 trg_nuc_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polynuc_seqs]
1637 trg_pep_chains = set(trg_pep_chains)
1638 trg_nuc_chains = set(trg_nuc_chains)
1639 for trg_ch, mdl_ch
in self.
mappingmapping.GetFlatMapping().items():
1640 if mdl_ch
in mdl_seqs
and trg_ch
in trg_seqs:
1641 if trg_ch
in trg_pep_chains:
1642 stype = mol.ChemType.AMINOACIDS
1643 elif trg_ch
in trg_nuc_chains:
1644 stype = mol.ChemType.NUCLEOTIDES
1646 raise RuntimeError(
"Chain name inconsistency... ask "
1648 alns.append(self.
chain_mapperchain_mapper.Align(trg_seqs[trg_ch],
1651 alns[-1].AttachView(0, trg_seqs[trg_ch].GetAttachedView())
1652 alns[-1].AttachView(1, mdl_seqs[mdl_ch].GetAttachedView())
1655 def _compute_pepnuc_aln(self):
1656 query =
"peptide=true or nucleotide=true"
1657 pep_nuc_target = self.
target_origtarget_orig.Select(query)
1658 pep_nuc_model = self.
model_origmodel_orig.Select(query)
1661 def _compute_aln(self):
1664 def _compute_stereochecked_aln(self):
1668 def _compute_lddt(self):
1669 LogScript(
"Computing all-atom lDDT")
1671 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
1674 stereochecked_alns = dict()
1676 mdl_seq = aln.GetSequence(1)
1677 stereochecked_alns[mdl_seq.name] = aln
1679 for aln
in self.
alnaln:
1680 mdl_seq = aln.GetSequence(1)
1681 alns[mdl_seq.name] = aln
1688 lddt_chain_mapping = dict()
1689 for mdl_ch, trg_ch
in flat_mapping.items():
1691 lddt_chain_mapping[mdl_ch] = trg_ch
1693 chain_mapping = lddt_chain_mapping,
1694 residue_mapping = alns,
1695 check_resnames=
False,
1696 local_lddt_prop=
"lddt",
1699 for r
in self.
modelmodel.residues:
1700 cname = r.GetChain().GetName()
1701 if cname
not in local_lddt:
1702 local_lddt[cname] = dict()
1703 if r.HasProp(
"lddt"):
1704 score = round(r.GetFloatProp(
"lddt"), 3)
1705 local_lddt[cname][r.GetNumber()] = score
1709 local_lddt[cname][r.GetNumber()] =
None
1712 lddt_chain_mapping = dict()
1713 for mdl_ch, trg_ch
in flat_mapping.items():
1714 if mdl_ch
in stereochecked_alns:
1715 lddt_chain_mapping[mdl_ch] = trg_ch
1717 chain_mapping = lddt_chain_mapping,
1718 residue_mapping = stereochecked_alns,
1719 check_resnames=
False,
1720 local_lddt_prop=
"lddt",
1723 for r
in self.
modelmodel.residues:
1724 cname = r.GetChain().GetName()
1725 if cname
not in local_lddt:
1726 local_lddt[cname] = dict()
1727 if r.HasProp(
"lddt"):
1728 score = round(r.GetFloatProp(
"lddt"), 3)
1729 local_lddt[cname][r.GetNumber()] = score
1733 if mdl_res.IsValid():
1736 local_lddt[cname][r.GetNumber()] =
None
1743 if cname
in flat_mapping:
1744 for col
in alns[cname]:
1745 if col[0] !=
'-' and col[1] !=
'-':
1746 if col.GetResidue(1).number == r.number:
1747 trg_r = col.GetResidue(0)
1750 local_lddt[cname][r.GetNumber()] =
None
1752 local_lddt[cname][r.GetNumber()] = 0.0
1754 self.
_lddt_lddt = lddt_score
1757 def _compute_bb_lddt(self):
1758 LogScript(
"Computing backbone lDDT")
1761 for aln
in self.
alnaln:
1762 mdl_seq = aln.GetSequence(1)
1763 alns[mdl_seq.name] = aln
1766 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
1767 lddt_chain_mapping = dict()
1768 for mdl_ch, trg_ch
in flat_mapping.items():
1770 lddt_chain_mapping[mdl_ch] = trg_ch
1773 chain_mapping = lddt_chain_mapping,
1774 residue_mapping = alns,
1775 check_resnames=
False,
1776 local_lddt_prop=
"bb_lddt",
1779 for r
in self.
modelmodel.residues:
1780 cname = r.GetChain().GetName()
1781 if cname
not in local_lddt:
1782 local_lddt[cname] = dict()
1783 if r.HasProp(
"bb_lddt"):
1784 score = round(r.GetFloatProp(
"bb_lddt"), 3)
1785 local_lddt[cname][r.GetNumber()] = score
1789 local_lddt[cname][r.GetNumber()] =
None
1794 def _compute_ilddt(self):
1795 LogScript(
"Computing all-atom ilDDT")
1797 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
1801 for aln
in self.
alnaln:
1802 mdl_seq = aln.GetSequence(1)
1803 alns[mdl_seq.name] = aln
1804 lddt_chain_mapping = dict()
1805 for mdl_ch, trg_ch
in flat_mapping.items():
1807 lddt_chain_mapping[mdl_ch] = trg_ch
1809 chain_mapping = lddt_chain_mapping,
1810 residue_mapping = alns,
1811 check_resnames=
False,
1812 local_lddt_prop=
"lddt",
1814 no_intrachain=
True)[0]
1818 mdl_seq = aln.GetSequence(1)
1819 alns[mdl_seq.name] = aln
1820 lddt_chain_mapping = dict()
1821 for mdl_ch, trg_ch
in flat_mapping.items():
1823 lddt_chain_mapping[mdl_ch] = trg_ch
1825 chain_mapping = lddt_chain_mapping,
1826 residue_mapping = alns,
1827 check_resnames=
False,
1828 local_lddt_prop=
"lddt",
1830 no_intrachain=
True)[0]
1833 def _compute_qs(self):
1834 LogScript(
"Computing global QS-score")
1835 qs_score_result = self.
qs_scorerqs_scorer.Score(self.
mappingmapping.mapping)
1836 self.
_qs_global_qs_global = qs_score_result.QS_global
1837 self.
_qs_best_qs_best = qs_score_result.QS_best
1839 def _compute_per_interface_qs_scores(self):
1840 LogScript(
"Computing per-interface QS-score")
1845 trg_ch1 = interface[0]
1846 trg_ch2 = interface[1]
1847 mdl_ch1 = interface[2]
1848 mdl_ch2 = interface[3]
1849 qs_res = self.
qs_scorerqs_scorer.ScoreInterface(trg_ch1, trg_ch2,
1854 def _compute_ics_scores(self):
1855 LogScript(
"Computing ICS scores")
1858 self.
_ics_recall_ics_recall = contact_scorer_res.recall
1859 self.
_ics_ics = contact_scorer_res.ics
1863 flat_mapping = self.
mappingmapping.GetFlatMapping()
1865 trg_ch1 = trg_int[0]
1866 trg_ch2 = trg_int[1]
1867 if trg_ch1
in flat_mapping
and trg_ch2
in flat_mapping:
1868 mdl_ch1 = flat_mapping[trg_ch1]
1869 mdl_ch2 = flat_mapping[trg_ch2]
1870 res = self.
contact_scorercontact_scorer.ScoreICSInterface(trg_ch1, trg_ch2,
1880 def _compute_ips_scores(self):
1881 LogScript(
"Computing IPS scores")
1884 self.
_ips_recall_ips_recall = contact_scorer_res.recall
1885 self.
_ips_ips = contact_scorer_res.ips
1890 flat_mapping = self.
mappingmapping.GetFlatMapping()
1892 trg_ch1 = trg_int[0]
1893 trg_ch2 = trg_int[1]
1894 if trg_ch1
in flat_mapping
and trg_ch2
in flat_mapping:
1895 mdl_ch1 = flat_mapping[trg_ch1]
1896 mdl_ch2 = flat_mapping[trg_ch2]
1897 res = self.
contact_scorercontact_scorer.ScoreIPSInterface(trg_ch1, trg_ch2,
1907 def _compute_dockq_scores(self):
1908 LogScript(
"Computing DockQ")
1911 self.
_fnat_fnat = list()
1913 self.
_irmsd_irmsd = list()
1914 self.
_lrmsd_lrmsd = list()
1915 self.
_nnat_nnat = list()
1916 self.
_nmdl_nmdl = list()
1919 for aln
in self.
alnaln:
1920 trg_s = aln.GetSequence(0)
1921 mdl_s = aln.GetSequence(1)
1922 dockq_alns[(trg_s.GetName(), mdl_s.GetName())] = aln
1925 trg_ch1 = interface[0]
1926 trg_ch2 = interface[1]
1927 mdl_ch1 = interface[2]
1928 mdl_ch2 = interface[3]
1929 aln1 = dockq_alns[(trg_ch1, mdl_ch1)]
1930 aln2 = dockq_alns[(trg_ch2, mdl_ch2)]
1932 res = dockq.DockQ(self.
modelmodel, self.
targettarget, mdl_ch1, mdl_ch2,
1933 trg_ch1, trg_ch2, ch1_aln=aln1,
1934 ch2_aln=aln2, contact_dist_thresh = 4.0,
1935 interface_dist_thresh=8.0,
1936 interface_cb =
True)
1938 res = dockq.DockQ(self.
modelmodel, self.
targettarget, mdl_ch1, mdl_ch2,
1939 trg_ch1, trg_ch2, ch1_aln=aln1,
1942 self.
_fnat_fnat.append(res[
"fnat"])
1943 self.
_fnonnat_fnonnat.append(res[
"fnonnat"])
1944 self.
_irmsd_irmsd.append(res[
"irmsd"])
1945 self.
_lrmsd_lrmsd.append(res[
"lrmsd"])
1947 self.
_nnat_nnat.append(res[
"nnat"])
1948 self.
_nmdl_nmdl.append(res[
"nmdl"])
1953 not_covered_counts = list()
1954 proc_trg_interfaces = set([(x[0], x[1])
for x
in self.
dockq_interfacesdockq_interfaces])
1956 if interface
not in proc_trg_interfaces:
1960 trg_ch1 = interface[0]
1961 trg_ch2 = interface[1]
1964 res = dockq.DockQ(self.
targettarget, self.
targettarget,
1965 trg_ch1, trg_ch2, trg_ch1, trg_ch2,
1966 contact_dist_thresh = 4.0,
1967 interface_dist_thresh=8.0,
1968 interface_cb =
True)
1970 res = dockq.DockQ(self.
targettarget, self.
targettarget,
1971 trg_ch1, trg_ch2, trg_ch1, trg_ch2)
1973 not_covered_counts.append(res[
"nnat"])
1980 weights = np.array(self.
_nnat_nnat)
1985 self.
_dockq_wave_dockq_wave = np.sum(np.multiply(weights/np.sum(weights), scores))
1986 scores = np.append(scores, [0.0]*len(not_covered_counts))
1987 weights = np.append(weights, not_covered_counts)
1992 self.
_dockq_wave_full_dockq_wave_full = np.sum(np.multiply(weights/np.sum(weights),
1995 def _extract_mapped_pos(self):
1999 processed_trg_chains = set()
2000 for trg_ch, mdl_ch
in self.
mappingmapping.GetFlatMapping().items():
2001 processed_trg_chains.add(trg_ch)
2002 aln = self.
mappingmapping.alns[(trg_ch, mdl_ch)]
2004 if col[0] !=
'-' and col[1] !=
'-':
2005 trg_res = col.GetResidue(0)
2006 mdl_res = col.GetResidue(1)
2007 trg_at = trg_res.FindAtom(
"CA")
2008 mdl_at = mdl_res.FindAtom(
"CA")
2009 if not trg_at.IsValid():
2010 trg_at = trg_res.FindAtom(
"C3'")
2011 if not mdl_at.IsValid():
2012 mdl_at = mdl_res.FindAtom(
"C3'")
2018 for ch
in self.
mappingmapping.target.chains:
2019 if ch.GetName()
not in processed_trg_chains:
2023 def _extract_rigid_mapped_pos(self):
2027 processed_trg_chains = set()
2028 for trg_ch, mdl_ch
in self.
rigid_mappingrigid_mapping.GetFlatMapping().items():
2029 processed_trg_chains.add(trg_ch)
2030 aln = self.
rigid_mappingrigid_mapping.alns[(trg_ch, mdl_ch)]
2032 if col[0] !=
'-' and col[1] !=
'-':
2033 trg_res = col.GetResidue(0)
2034 mdl_res = col.GetResidue(1)
2035 trg_at = trg_res.FindAtom(
"CA")
2036 mdl_at = mdl_res.FindAtom(
"CA")
2037 if not trg_at.IsValid():
2038 trg_at = trg_res.FindAtom(
"C3'")
2039 if not mdl_at.IsValid():
2040 mdl_at = mdl_res.FindAtom(
"C3'")
2047 if ch.GetName()
not in processed_trg_chains:
2051 def _compute_cad_score(self):
2053 raise RuntimeError(
"CAD score computations rely on residue numbers "
2054 "that are consistent between target and model "
2055 "chains, i.e. only work if resnum_alignments "
2056 "is True at Scorer construction.")
2058 LogScript(
"Computing CAD score")
2060 settings.Locate(
"voronota-cadscore",
2062 except Exception
as e:
2063 raise RuntimeError(
"voronota-cadscore must be in PATH for CAD "
2064 "score scoring")
from e
2065 cad_bin_dir = os.path.dirname(cad_score_exec)
2066 m = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
2067 cad_result = cadscore.CADScore(self.
modelmodel, self.
targettarget,
2071 cad_bin_path=cad_bin_dir,
2075 for r
in self.
modelmodel.residues:
2076 cname = r.GetChain().GetName()
2077 if cname
not in local_cad:
2078 local_cad[cname] = dict()
2079 if r.HasProp(
"localcad"):
2080 score = round(r.GetFloatProp(
"localcad"), 3)
2081 local_cad[cname][r.GetNumber()] = score
2083 local_cad[cname][r.GetNumber()] =
None
2085 self.
_cad_score_cad_score = cad_result.globalAA
2088 def _get_repr_view(self, ent):
2089 """ Returns view with representative peptide atoms => CB, CA for GLY
2091 Ensures that each residue has exactly one atom with assertions
2093 :param ent: Entity for which you want the representative view
2094 :param ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
2095 :returns: :class:`ost.mol.EntityView` derived from ent
2097 repr_ent = ent.Select(
"(aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\"))")
2098 for r
in repr_ent.residues:
2099 assert(len(r.atoms) == 1)
2102 def _get_interface_residues(self, ent):
2103 """ Get interface residues
2105 Thats all residues having a contact with at least one residue from another
2106 chain (CB-CB distance <= 8A, CA in case of Glycine)
2108 :param ent: Model for which to extract interface residues
2109 :type ent: :class:`ost.mol.EntityView`
2110 :returns: :class:`dict` with chain names as key and and :class:`list`
2111 with residue numbers of the respective interface residues.
2115 result = {ch.GetName(): list()
for ch
in ent.chains}
2116 for ch
in ent.chains:
2117 cname = ch.GetName()
2118 sel = repr_ent.Select(f
"(cname={mol.QueryQuoteName(cname)} and 8 <> [cname!={mol.QueryQuoteName(cname)}])")
2119 result[cname] = [r.GetNumber()
for r
in sel.residues]
2122 def _do_stereochecks(self):
2123 """ Perform stereochemistry checks on model and target
2125 LogInfo(
"Performing stereochemistry checks on model and target")
2126 data = stereochemistry.GetDefaultStereoData()
2127 l_data = stereochemistry.GetDefaultStereoLinkData()
2129 a, b, c, d = stereochemistry.StereoCheck(self.
modelmodel, stereo_data = data,
2130 stereo_link_data = l_data)
2136 a, b, c, d = stereochemistry.StereoCheck(self.
targettarget, stereo_data = data,
2137 stereo_link_data = l_data)
2144 def _get_interface_patches(self, mdl_ch, mdl_rnum):
2145 """ Select interface patches representative for specified residue
2147 The patches for specified residue r in chain c are selected as follows:
2149 * mdl_patch_one: All residues in c with CB (CA for GLY) positions within 8A
2150 of r and within 12A of residues from any other chain.
2151 * mdl_patch_two: Closest residue x to r in any other chain gets identified.
2152 Patch is then constructed by selecting all residues from any other chain
2153 within 8A of x and within 12A from any residue in c.
2154 * trg_patch_one: Chain name and residue number based mapping from
2156 * trg_patch_two: Chain name and residue number based mapping from
2159 :param mdl_ch: Name of chain in *self.model* of residue of interest
2160 :type mdl_ch: :class:`str`
2161 :param mdl_rnum: Residue number of residue of interest
2162 :type mdl_rnum: :class:`ost.mol.ResNum`
2163 :returns: Tuple with 5 elements: 1) :class:`bool` flag whether all residues
2164 in *mdl* patches are covered in *trg* 2) mtl_patch_one
2165 3) mdl_patch_two 4) trg_patch_one 5) trg_patch_two
2171 r = self.
modelmodel.FindResidue(mdl_ch, mdl_rnum)
2173 raise RuntimeError(f
"Cannot find residue {mdl_rnum} in chain {mdl_ch}")
2174 if r.GetName() ==
"GLY":
2175 at = r.FindAtom(
"CA")
2177 at = r.FindAtom(
"CB")
2178 if not at.IsValid():
2179 raise RuntimeError(
"Cannot find interface views for res without CB/CA")
2186 q1 = f
"(cname={mol.QueryQuoteName(mdl_ch)} and 8 <> {{{r_pos[0]},{r_pos[1]},{r_pos[2]}}})"
2188 q2 = f
"(12 <> [cname!={mol.QueryQuoteName(mdl_ch)}])"
2189 mdl_patch_one = self.
modelmodel.CreateEmptyView()
2190 sel = repr_mdl.Select(
" and ".join([q1, q2]))
2191 for r
in sel.residues:
2192 mdl_r = self.
modelmodel.FindResidue(r.GetChain().GetName(), r.GetNumber())
2193 mdl_patch_one.AddResidue(mdl_r, mol.ViewAddFlag.INCLUDE_ALL)
2199 sel = repr_mdl.Select(f
"(cname!={mol.QueryQuoteName(mdl_ch)})")
2200 close_stuff = sel.FindWithin(r_pos, 8)
2203 for close_at
in close_stuff:
2206 min_pos = close_at.GetPos()
2210 q1 = f
"(cname!={mol.QueryQuoteName(mdl_ch)} and 8 <> {{{min_pos[0]},{min_pos[1]},{min_pos[2]}}})"
2212 q2 = f
"(12 <> [cname={mol.QueryQuoteName(mdl_ch)}])"
2213 mdl_patch_two = self.
modelmodel.CreateEmptyView()
2214 sel = repr_mdl.Select(
" and ".join([q1, q2]))
2215 for r
in sel.residues:
2216 mdl_r = self.
modelmodel.FindResidue(r.GetChain().GetName(), r.GetNumber())
2217 mdl_patch_two.AddResidue(mdl_r, mol.ViewAddFlag.INCLUDE_ALL)
2220 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
2221 full_trg_coverage =
True
2222 trg_patch_one = self.
targettarget.CreateEmptyView()
2223 for r
in mdl_patch_one.residues:
2225 mdl_cname = r.GetChain().GetName()
2226 if mdl_cname
in flat_mapping:
2227 aln = self.
mappingmapping.alns[(flat_mapping[mdl_cname], mdl_cname)]
2229 if col[0] !=
'-' and col[1] !=
'-':
2230 if col.GetResidue(1).GetNumber() == r.GetNumber():
2231 trg_r = col.GetResidue(0)
2233 if trg_r
is not None:
2234 trg_patch_one.AddResidue(trg_r.handle,
2235 mol.ViewAddFlag.INCLUDE_ALL)
2237 full_trg_coverage =
False
2239 trg_patch_two = self.
targettarget.CreateEmptyView()
2240 for r
in mdl_patch_two.residues:
2242 mdl_cname = r.GetChain().GetName()
2243 if mdl_cname
in flat_mapping:
2244 aln = self.
mappingmapping.alns[(flat_mapping[mdl_cname], mdl_cname)]
2246 if col[0] !=
'-' and col[1] !=
'-':
2247 if col.GetResidue(1).GetNumber() == r.GetNumber():
2248 trg_r = col.GetResidue(0)
2250 if trg_r
is not None:
2251 trg_patch_two.AddResidue(trg_r.handle,
2252 mol.ViewAddFlag.INCLUDE_ALL)
2254 full_trg_coverage =
False
2256 return (full_trg_coverage, mdl_patch_one, mdl_patch_two,
2257 trg_patch_one, trg_patch_two)
2259 def _compute_patchqs_scores(self):
2260 LogScript(
"Computing patch QS-scores")
2266 r = self.
modelmodel.FindResidue(cname, rnum)
2267 if r.IsValid()
and r.GetChemType() == mol.ChemType.AMINOACIDS:
2268 full_trg_coverage, mdl_patch_one, mdl_patch_two, \
2269 trg_patch_one, trg_patch_two = \
2271 if full_trg_coverage:
2272 score = self.
_patchqs_patchqs(mdl_patch_one, mdl_patch_two,
2273 trg_patch_one, trg_patch_two)
2274 scores.append(score)
2277 def _compute_patchdockq_scores(self):
2278 LogScript(
"Computing patch DockQ scores")
2284 r = self.
modelmodel.FindResidue(cname, rnum)
2285 if r.IsValid()
and r.GetChemType() == mol.ChemType.AMINOACIDS:
2286 full_trg_coverage, mdl_patch_one, mdl_patch_two, \
2287 trg_patch_one, trg_patch_two = \
2289 if full_trg_coverage:
2290 score = self.
_patchdockq_patchdockq(mdl_patch_one, mdl_patch_two,
2291 trg_patch_one, trg_patch_two)
2292 scores.append(score)
2295 def _patchqs(self, mdl_patch_one, mdl_patch_two, trg_patch_one, trg_patch_two):
2296 """ Score interface residue patches with QS-score
2298 In detail: Construct two entities with two chains each. First chain
2299 consists of residues from <x>_patch_one and second chain consists of
2300 <x>_patch_two. The returned score is the QS-score between the two
2303 :param mdl_patch_one: Interface patch representing scored residue
2304 :type mdl_patch_one: :class:`ost.mol.EntityView`
2305 :param mdl_patch_two: Interface patch representing scored residue
2306 :type mdl_patch_two: :class:`ost.mol.EntityView`
2307 :param trg_patch_one: Interface patch representing scored residue
2308 :type trg_patch_one: :class:`ost.mol.EntityView`
2309 :param trg_patch_two: Interface patch representing scored residue
2310 :type trg_patch_two: :class:`ost.mol.EntityView`
2311 :returns: PatchQS score
2316 alnA = seq.CreateAlignment()
2317 s =
''.join([r.one_letter_code
for r
in mdl_patch_one.residues])
2318 alnA.AddSequence(seq.CreateSequence(
"A", s))
2319 s =
''.join([r.one_letter_code
for r
in trg_patch_one.residues])
2320 alnA.AddSequence(seq.CreateSequence(
"A", s))
2322 alnB = seq.CreateAlignment()
2323 s =
''.join([r.one_letter_code
for r
in mdl_patch_two.residues])
2324 alnB.AddSequence(seq.CreateSequence(
"B", s))
2325 s =
''.join([r.one_letter_code
for r
in trg_patch_two.residues])
2326 alnB.AddSequence(seq.CreateSequence(
"B", s))
2327 alns = {(
"A",
"A"): alnA, (
"B",
"B"): alnB}
2329 scorer =
QSScorer(qs_ent_mdl, [[
"A"], [
"B"]], qs_ent_trg, alns)
2330 score_result = scorer.Score([[
"A"], [
"B"]])
2332 return score_result.QS_global
2334 def _patchdockq(self, mdl_patch_one, mdl_patch_two, trg_patch_one,
2336 """ Score interface residue patches with DockQ
2338 In detail: Construct two entities with two chains each. First chain
2339 consists of residues from <x>_patch_one and second chain consists of
2340 <x>_patch_two. The returned score is the QS-score between the two
2343 :param mdl_patch_one: Interface patch representing scored residue
2344 :type mdl_patch_one: :class:`ost.mol.EntityView`
2345 :param mdl_patch_two: Interface patch representing scored residue
2346 :type mdl_patch_two: :class:`ost.mol.EntityView`
2347 :param trg_patch_one: Interface patch representing scored residue
2348 :type trg_patch_one: :class:`ost.mol.EntityView`
2349 :param trg_patch_two: Interface patch representing scored residue
2350 :type trg_patch_two: :class:`ost.mol.EntityView`
2351 :returns: DockQ score
2355 dockq_result = dockq.DockQ(t, m,
"A",
"B",
"A",
"B")
2356 if dockq_result[
"nnat"] > 0:
2357 return dockq_result[
"DockQ"]
2360 def _qs_ent_from_patches(self, patch_one, patch_two):
2361 """ Constructs Entity with two chains named "A" and "B""
2363 Blindly adds all residues from *patch_one* to chain A and residues from
2364 patch_two to chain B.
2366 ent = mol.CreateEntity()
2368 added_ch = ed.InsertChain(
"A")
2369 for r
in patch_one.residues:
2370 added_r = ed.AppendResidue(added_ch, r.GetName())
2371 added_r.SetChemClass(str(r.GetChemClass()))
2373 ed.InsertAtom(added_r, a.handle)
2374 added_ch = ed.InsertChain(
"B")
2375 for r
in patch_two.residues:
2376 added_r = ed.AppendResidue(added_ch, r.GetName())
2377 added_r.SetChemClass(str(r.GetChemClass()))
2379 ed.InsertAtom(added_r, a.handle)
2382 def _construct_custom_mapping(self, mapping):
2383 """ constructs a full blown MappingResult object from simple dict
2385 :param mapping: mapping with trg chains as key and mdl ch as values
2386 :type mapping: :class:`dict`
2388 LogInfo(
"Setting custom chain mapping")
2391 chem_mapping, chem_group_alns, mdl = \
2392 chain_mapper.GetChemMapping(self.
modelmodel)
2397 if len(mapping) != len(set(mapping.keys())):
2398 raise RuntimeError(f
"Expect unique trg chain names in mapping. Got "
2399 f
"{mapping.keys()}")
2400 if len(mapping) != len(set(mapping.values())):
2401 raise RuntimeError(f
"Expect unique mdl chain names in mapping. Got "
2402 f
"{mapping.values()}")
2404 trg_chains = set([ch.GetName()
for ch
in chain_mapper.target.chains])
2405 mdl_chains = set([ch.GetName()
for ch
in mdl.chains])
2406 for k,v
in mapping.items():
2407 if k
not in trg_chains:
2408 raise RuntimeError(f
"Target chain \"{k}\" is not available "
2409 f
"in target processed for chain mapping "
2411 if v
not in mdl_chains:
2412 raise RuntimeError(f
"Model chain \"{v}\" is not available "
2413 f
"in model processed for chain mapping "
2416 for trg_ch, mdl_ch
in mapping.items():
2417 trg_group_idx =
None
2418 mdl_group_idx =
None
2419 for idx, group
in enumerate(chain_mapper.chem_groups):
2423 for idx, group
in enumerate(chem_mapping):
2427 if trg_group_idx
is None or mdl_group_idx
is None:
2428 raise RuntimeError(
"Could not establish a valid chem grouping "
2429 "of chain names provided in custom mapping.")
2431 if trg_group_idx != mdl_group_idx:
2432 raise RuntimeError(f
"Chem group mismatch in custom mapping: "
2433 f
"target chain \"{trg_ch}\" groups with the "
2434 f
"following chemically equivalent target "
2436 f
"{chain_mapper.chem_groups[trg_group_idx]} "
2437 f
"but model chain \"{mdl_ch}\" maps to the "
2438 f
"following target chains: "
2439 f
"{chain_mapper.chem_groups[mdl_group_idx]}")
2441 pairs = set([(trg_ch, mdl_ch)
for trg_ch, mdl_ch
in mapping.items()])
2443 chain_mapping._GetRefMdlAlns(chain_mapper.chem_groups,
2444 chain_mapper.chem_group_alignments,
2450 final_mapping = list()
2451 for ref_chains
in chain_mapper.chem_groups:
2452 mapped_mdl_chains = list()
2453 for ref_ch
in ref_chains:
2454 if ref_ch
in mapping:
2455 mapped_mdl_chains.append(mapping[ref_ch])
2457 mapped_mdl_chains.append(
None)
2458 final_mapping.append(mapped_mdl_chains)
2461 for ref_group, mdl_group
in zip(chain_mapper.chem_groups,
2463 for ref_ch, mdl_ch
in zip(ref_group, mdl_group):
2464 if ref_ch
is not None and mdl_ch
is not None:
2465 aln = ref_mdl_alns[(ref_ch, mdl_ch)]
2466 trg_view = chain_mapper.target.Select(f
"cname={mol.QueryQuoteName(ref_ch)}")
2467 mdl_view = mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch)}")
2468 aln.AttachView(0, trg_view)
2469 aln.AttachView(1, mdl_view)
2470 alns[(ref_ch, mdl_ch)] = aln
2473 chain_mapper.chem_groups,
2475 final_mapping, alns)
2477 def _compute_tmscore(self):
2480 LogScript(
"Computing patch TM-score with USalign exectuable")
2482 flat_mapping = self.
rigid_mappingrigid_mapping.GetFlatMapping()
2483 LogInfo(
"Overriding TM-score chain mapping")
2484 res = res = bindings.WrappedMMAlign(self.
modelmodel, self.
targettarget,
2485 mapping=flat_mapping)
2487 res = bindings.WrappedMMAlign(self.
modelmodel, self.
targettarget)
2489 LogScript(
"Computing patch TM-score with built-in USalign")
2491 LogInfo(
"Overriding TM-score chain mapping")
2492 flat_mapping = self.
rigid_mappingrigid_mapping.GetFlatMapping()
2493 res = tmtools.USAlign(self.
modelmodel, self.
targettarget,
2495 custom_chain_mapping = flat_mapping)
2497 res = tmtools.USAlign(self.
modelmodel, self.
targettarget,
2502 for a,b
in zip(res.ent1_mapped_chains, res.ent2_mapped_chains):
2506 __all__ = (
'lDDTBSScorer',
'Scorer',)
def target_interface_residues(self)
def _patchqs(self, mdl_patch_one, mdl_patch_two, trg_patch_one, trg_patch_two)
_per_interface_ips_recall
def per_interface_ics(self)
def target_bad_angles(self)
def _get_repr_view(self, ent)
def qs_target_interfaces(self)
def rigid_mapped_model_pos(self)
def _do_stereochecks(self)
def _compute_per_interface_qs_scores(self)
def model_bad_angles(self)
_per_interface_ics_recall
def __init__(self, model, target, resnum_alignments=False, molck_settings=None, cad_score_exec=None, custom_mapping=None, custom_rigid_mapping=None, usalign_exec=None, lddt_no_stereochecks=False, n_max_naive=40320, oum=False, min_pep_length=6, min_nuc_length=4, lddt_add_mdl_contacts=False, dockq_capri_peptide=False)
_per_interface_ips_precision
def model_interface_residues(self)
_contact_target_interfaces
def stereochecked_target(self)
def per_interface_ips_precision(self)
def dockq_wave_full(self)
def mapped_model_pos(self)
def _compute_dockq_scores(self)
def _aln_helper(self, target, model)
def model_bad_bonds(self)
def per_interface_ics_recall(self)
def _compute_bb_lddt(self)
def transformed_mapped_model_pos(self)
def usalign_mapping(self)
def native_contacts(self)
def local_cad_score(self)
def _compute_patchqs_scores(self)
def per_interface_ips(self)
def _compute_stereochecked_aln(self)
_rigid_transformed_mapped_model_pos
def rigid_transformed_mapped_model_pos(self)
def n_target_not_mapped(self)
def contact_model_interfaces(self)
_transformed_mapped_model_pos
def _patchdockq(self, mdl_patch_one, mdl_patch_two, trg_patch_one, trg_patch_two)
def dockq_interfaces(self)
def per_interface_ips_recall(self)
def per_interface_ics_precision(self)
def _compute_ics_scores(self)
_rigid_n_target_not_mapped
def _get_interface_residues(self, ent)
def rigid_transform(self)
def _compute_ips_scores(self)
def _extract_mapped_pos(self)
def rigid_mapped_target_pos(self)
def _compute_tmscore(self)
def dockq_target_interfaces(self)
def stereochecked_aln(self)
_per_interface_ics_precision
def _compute_patchdockq_scores(self)
def _get_interface_patches(self, mdl_ch, mdl_rnum)
def _extract_rigid_mapped_pos(self)
_contact_model_interfaces
_target_interface_residues
def contact_target_interfaces(self)
def per_interface_qs_global(self)
def _qs_ent_from_patches(self, patch_one, patch_two)
def stereochecked_model(self)
_model_interface_residues
def _compute_pepnuc_aln(self)
def per_interface_qs_best(self)
def rigid_n_target_not_mapped(self)
def mapped_target_pos(self)
def target_bad_bonds(self)
def qs_model_interfaces(self)
def _compute_cad_score(self)
def _construct_custom_mapping(self, mapping)
def ScoreBS(self, ligand, radius=4.0, lddt_radius=10.0)
def __init__(self, reference, model, residue_number_alignment=False)
Real DLLEXPORT_OST_GEOM Distance(const Line2 &l, const Vec2 &v)
void Molck(ost::mol::EntityHandle &ent, ost::conop::CompoundLibPtr lib, const MolckSettings &settings, bool prune=true)
void GDT(const geom::Vec3List &mdl_pos, const geom::Vec3List &ref_pos, int window_size, int max_windows, Real distance_thresh, int &n_superposed, geom::Mat4 &transform)