6 from ost
import settings
8 from ost
import LogScript, LogInfo, LogDebug
20 from ost
import bindings
25 def _GetAlignedResidues(aln, s1_ent, s2_ent):
26 """ Yields aligned residues
28 :param aln: The alignment with 2 sequences defining a residue-by-residue
30 :type aln: :class:`ost.seq.AlignmentHandle`
31 :param s1_ent: Structure representing first sequence in *aln*.
32 One chain must be named after the first sequence and the
33 number of residues must match the number of non-gap
35 :type s1_ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
36 :param s2_ent: Same for second sequence in *aln*.
37 :type s2_ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
39 s1_ch = s1_ent.FindChain(aln.GetSequence(0).GetName())
40 s2_ch = s2_ent.FindChain(aln.GetSequence(1).GetName())
42 if not s1_ch.IsValid():
43 raise RuntimeError(
"s1_ent lacks required chain in _GetAlignedResidues")
45 if not s2_ch.IsValid():
46 raise RuntimeError(
"s2_ent lacks required chain in _GetAlignedResidues")
48 if len(aln.GetSequence(0).GetGaplessString()) != s1_ch.GetResidueCount():
49 raise RuntimeError(
"aln/chain mismatch in _GetAlignedResidues")
50 if len(aln.GetSequence(1).GetGaplessString()) != s2_ch.GetResidueCount():
51 raise RuntimeError(
"aln/chain mismatch in _GetAlignedResidues")
53 s1_res = s1_ch.residues
54 s2_res = s2_ch.residues
60 if col[0] !=
'-' and col[1] !=
'-':
61 yield (s1_res[s1_res_idx], s2_res[s2_res_idx])
68 """Scorer specific for a reference/model pair
70 Finds best possible binding site representation of reference in model given
71 LDDT score. Uses :class:`ost.mol.alg.chain_mapping.ChainMapper` to deal with
74 :param reference: Reference structure
75 :type reference: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
76 :param model: Model structure
77 :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
78 :param residue_number_alignment: Passed to ChainMapper constructor
79 :type residue_number_alignment: :class:`bool`
82 residue_number_alignment=False):
84 resnum_alignments=residue_number_alignment)
88 def ScoreBS(self, ligand, radius = 4.0, lddt_radius=10.0):
89 """Computes binding site LDDT score given *ligand*. Best possible
90 binding site representation is selected by LDDT but other scores such as
91 CA based RMSD and GDT are computed too and returned.
93 :param ligand: Defines the scored binding site, i.e. provides positions
94 to perform proximity search
95 :type ligand: r'((Residue)|(Chain)|(Entity))((View)|(Handle))'
96 :param radius: Reference residues with any atom position within *radius*
97 of *ligand* consitute the scored binding site
98 :type radius: :class:`float`
99 :param lddt_radius: Passed as *inclusion_radius* to
100 :class:`ost.mol.alg.lddt.lDDTScorer`
101 :type lddt_radius: :class:`float`
102 :returns: Object of type :class:`ost.mol.alg.chain_mapping.ReprResult`
103 containing all atom LDDT score and mapping information.
104 None if no representation could be found.
108 ref_residues_hashes = set()
109 for ligand_at
in ligand.atoms:
110 close_atoms = self.
refref.FindWithin(ligand_at.GetPos(), radius)
111 for close_at
in close_atoms:
112 ref_res = close_at.GetResidue()
113 h = ref_res.handle.GetHashCode()
114 if h
not in ref_residues_hashes:
115 ref_residues_hashes.add(h)
120 ref_bs = self.
refref.CreateEmptyView()
121 for ch
in self.
refref.chains:
122 for r
in ch.residues:
123 if r.handle.GetHashCode()
in ref_residues_hashes:
124 ref_bs.AddResidue(r, mol.ViewAddFlag.INCLUDE_ALL)
128 inclusion_radius = lddt_radius)
129 if len(bs_repr) >= 1:
136 """ Helper class to access the various scores available from ost.mol.alg
138 Deals with structure cleanup, chain mapping, interface identification etc.
139 Intermediate results are available as attributes.
141 :param model: Model structure - a deep copy is available as :attr:`~model`.
142 Additionally, :func:`ost.mol.alg.Molck` using *molck_settings*
144 :type model: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
145 :param target: Target structure - a deep copy is available as :attr:`~target`.
146 Additionally, :func:`ost.mol.alg.Molck` using *molck_settings*
148 :type target: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
149 :param resnum_alignments: Whether alignments between chemically equivalent
150 chains in *model* and *target* can be computed
151 based on residue numbers. This can be assumed in
152 benchmarking setups such as CAMEO/CASP.
153 :type resnum_alignments: :class:`bool`
154 :param molck_settings: Settings used for Molck on *model* and *target*, if
155 set to None, a default object is constructed by
156 setting everything except rm_zero_occ_atoms and
158 :class:`ost.mol.alg.MolckSettings` constructor.
159 :type molck_settings: :class:`ost.mol.alg.MolckSettings`
160 :param cad_score_exec: Explicit path to voronota-cadscore executable from
161 voronota installation from
162 https://github.com/kliment-olechnovic/voronota. If
163 not given, voronota-cadscore must be in PATH if any
164 of the CAD score related attributes is requested.
165 :type cad_score_exec: :class:`str`
166 :param custom_mapping: Provide custom chain mapping between *model* and
167 *target*. Dictionary with target chain names as key
168 and model chain names as value.
169 :attr:`~mapping` is constructed from this.
170 :type custom_mapping: :class:`dict`
171 :param custom_rigid_mapping: Provide custom chain mapping between *model*
172 and *target*. Dictionary with target chain
173 names as key and model chain names as value.
174 :attr:`~rigid_mapping` is constructed from
176 :type custom_rigid_mapping: :class:`dict`
177 :param usalign_exec: Explicit path to USalign executable used to compute
178 TM-score. If not given, TM-score will be computed
179 with OpenStructure internal copy of USalign code.
180 :type usalign_exec: :class:`str`
181 :param lddt_no_stereochecks: Whether to compute LDDT without stereochemistry
183 :type lddt_no_stereochecks: :class:`bool`
184 :param n_max_naive: Parameter for chain mapping. If the number of possible
185 mappings is <= *n_max_naive*, the full
186 mapping solution space is enumerated to find the
187 the optimum. A heuristic is used otherwise. The default
188 of 40320 corresponds to an octamer (8! = 40320).
189 A structure with stoichiometry A6B2 would be
191 :type n_max_naive: :class:`int`
192 :param oum: Override USalign Mapping. Inject rigid_mapping of
193 :class:`Scorer` object into USalign to compute TM-score.
194 Experimental feature with limitations.
195 :type oum: :class:`bool`
196 :param min_pep_length: Relevant parameter if short peptides are involved in
197 scoring. Minimum peptide length for a chain in the
198 target structure to be considered in chain mapping.
199 The chain mapping algorithm first performs an all vs.
200 all pairwise sequence alignment to identify \"equal\"
201 chains within the target structure. We go for simple
202 sequence identity there. Short sequences can be
203 problematic as they may produce high sequence identity
204 alignments by pure chance.
205 :type min_pep_length: :class:`int`
206 :param min_nuc_length: Relevant parameter if short nucleotides are involved
207 in scoring. Minimum nucleotide length for a chain in
208 the target structure to be considered in chain
209 mapping. The chain mapping algorithm first performs
210 an all vs. all pairwise sequence alignment to
211 identify \"equal\" chains within the target
212 structure. We go for simple sequence identity there.
213 Short sequences can be problematic as they may
214 produce high sequence identity alignments by pure
216 :type min_nuc_length: :class:`int`
217 :param lddt_add_mdl_contacts: LDDT specific flag. Only using contacts in
218 LDDT that are within a certain distance
219 threshold in the target does not penalize
220 for added model contacts. If set to True, this
221 flag will also consider target contacts
222 that are within the specified distance
223 threshold in the model but not necessarily in
224 the target. No contact will be added if the
225 respective atom pair is not resolved in the
227 :type lddt_add_mdl_contacts: :class:`bool`
228 :param dockq_capri_peptide: Flag that changes two things in the way DockQ
229 and its underlying scores are computed which is
230 proposed by the CAPRI community when scoring
231 peptides (PMID: 31886916).
232 ONE: Two residues are considered in contact if
233 any of their atoms is within 5A. This is
234 relevant for fnat and fnonat scores. CAPRI
235 suggests to lower this threshold to 4A for
236 protein-peptide interactions.
237 TWO: irmsd is computed on interface residues. A
238 residue is defined as interface residue if any
239 of its atoms is within 10A of another chain.
240 CAPRI suggests to lower the default of 10A to
241 8A in combination with only considering CB atoms
242 for protein-peptide interactions.
243 This flag has no influence on patch_dockq
245 :type dockq_capri_peptide: :class:`bool`
246 :param lddt_symmetry_settings: Passed as *symmetry_settings* parameter to
247 LDDT scorer. Default: None
248 :type lddt_symmetry_settings: :class:`ost.mol.alg.lddt.SymmetrySettings`
249 :param lddt_inclusion_radius: LDDT inclusion radius.
250 :param pep_seqid_thr: Parameter that affects identification of identical
251 chains in target - see
252 :class:`ost.mol.alg.chain_mapping.ChainMapper`
253 :type pep_seqid_thr: :class:`float`
254 :param nuc_seqid_thr: Parameter that affects identification of identical
255 chains in target - see
256 :class:`ost.mol.alg.chain_mapping.ChainMapper`
257 :type nuc_seqid_thr: :class:`float`
258 :param mdl_map_pep_seqid_thr: Parameter that affects mapping of model chains
259 to target chains - see
260 :class:`ost.mol.alg.chain_mapping.ChainMapper`
261 :type mdl_map_pep_seqid_thr: :class:`float`
262 :param mdl_map_nuc_seqid_thr: Parameter that affects mapping of model chains
263 to target chains - see
264 :class:`ost.mol.alg.chain_mapping.ChainMapper`
265 :type mdl_map_nuc_seqid_thr: :class:`float`
266 :param seqres: Parameter that affects identification of identical chains in
267 target - see :class:`ost.mol.alg.chain_mapping.ChainMapper`
268 :type seqres: :class:`ost.seq.SequenceList`
269 :param trg_seqres_mapping: Parameter that affects identification of identical
270 chains in target - see
271 :class:`ost.mol.alg.chain_mapping.ChainMapper`
272 :type trg_seqres_mapping: :class:`dict`
274 def __init__(self, model, target, resnum_alignments=False,
275 molck_settings = None, cad_score_exec = None,
276 custom_mapping=None, custom_rigid_mapping=None,
277 usalign_exec = None, lddt_no_stereochecks=False,
278 n_max_naive=40320, oum=False, min_pep_length = 6,
279 min_nuc_length = 4, lddt_add_mdl_contacts=False,
280 dockq_capri_peptide=False, lddt_symmetry_settings = None,
281 lddt_inclusion_radius = 15.0,
282 pep_seqid_thr = 95., nuc_seqid_thr = 95.,
283 mdl_map_pep_seqid_thr = 0.,
284 mdl_map_nuc_seqid_thr = 0.,
286 trg_seqres_mapping = None):
305 if molck_settings
is None:
310 rm_zero_occ_atoms=
False,
314 LogScript(
"Cleaning up input structures")
315 Molck(self.
_model_model, conop.GetDefaultLib(), molck_settings)
316 Molck(self.
_target_target, conop.GetDefaultLib(), molck_settings)
318 if resnum_alignments:
331 self.
_model_model = self.
_model_model.Select(
"peptide=True or nucleotide=True")
332 self.
_target_target = self.
_target_target.Select(
"peptide=True or nucleotide=True")
335 for ch
in self.
_model_model.chains:
336 if ch.GetName().strip() ==
"":
337 raise RuntimeError(
"Model chains must have valid chain names")
338 if ch.GetName().strip() ==
"'" or ch.GetName().strip() ==
'"':
339 raise RuntimeError(
"Model chains cannot be named with quote signs (' or \"\")")
342 for ch
in self.
_target_target.chains:
343 if ch.GetName().strip() ==
"":
344 raise RuntimeError(
"Target chains must have valid chain names")
345 if ch.GetName().strip() ==
"'" or ch.GetName().strip() ==
'"':
346 raise RuntimeError(
"Target chains cannot be named with quote signs (' or \"\")")
348 if resnum_alignments:
352 for ch
in self.
_model_model.chains:
353 ins_codes = [r.GetNumber().GetInsCode()
for r
in ch.residues]
354 if len(set(ins_codes)) != 1
or ins_codes[0] !=
'\0':
355 raise RuntimeError(
"Residue numbers in each model chain "
356 "must not contain insertion codes if "
357 "resnum_alignments are enabled")
358 nums = [r.GetNumber().GetNum()
for r
in ch.residues]
359 if not all(i < j
for i, j
in zip(nums, nums[1:])):
360 raise RuntimeError(
"Residue numbers in each model chain "
361 "must be strictly increasing if "
362 "resnum_alignments are enabled")
364 for ch
in self.
_target_target.chains:
365 ins_codes = [r.GetNumber().GetInsCode()
for r
in ch.residues]
366 if len(set(ins_codes)) != 1
or ins_codes[0] !=
'\0':
367 raise RuntimeError(
"Residue numbers in each target chain "
368 "must not contain insertion codes if "
369 "resnum_alignments are enabled")
370 nums = [r.GetNumber().GetNum()
for r
in ch.residues]
371 if not all(i < j
for i, j
in zip(nums, nums[1:])):
372 raise RuntimeError(
"Residue numbers in each target chain "
373 "must be strictly increasing if "
374 "resnum_alignments are enabled")
376 if usalign_exec
is not None:
377 if not os.path.exists(usalign_exec):
378 raise RuntimeError(f
"USalign exec ({usalign_exec}) "
380 if not os.access(usalign_exec, os.X_OK):
381 raise RuntimeError(f
"USalign exec ({usalign_exec}) "
382 f
"is not executable")
431 self.
_lddt_lddt =
None
482 self.
_fnat_fnat =
None
486 self.
_nnat_nnat =
None
487 self.
_nmdl_nmdl =
None
518 self.
_rmsd_rmsd =
None
529 if custom_mapping
is not None:
532 if custom_rigid_mapping
is not None:
535 LogDebug(
"Scorer sucessfully initialized")
539 """ Model with Molck cleanup
541 :type: :class:`ost.mol.EntityHandle`
547 """ The original model passed at object construction
549 :type: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
555 """ A selection of :attr:`~model_orig`
557 Only contains peptide and nucleotide residues
559 :type: :class:`ost.mol.EntityView`
562 query =
"peptide=true or nucleotide=true"
568 """ Target with Molck cleanup
570 :type: :class:`ost.mol.EntityHandle`
576 """ The original target passed at object construction
578 :type: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
584 """ A selection of :attr:`~target_orig`
586 Only contains peptide and nucleotide residues
588 :type: :class:`ost.mol.EntityView`
591 query =
"peptide=true or nucleotide=true"
597 """ Alignments of :attr:`~model`/:attr:`~target` chains
599 Alignments for each pair of chains mapped in :attr:`~mapping`.
600 First sequence is target sequence, second sequence the model sequence.
602 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
604 if self.
_aln_aln
is None:
610 """ Stereochecked equivalent of :attr:`~aln`
612 The alignments may differ, as stereochecks potentially remove residues
614 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
622 """ Alignments of :attr:`~model_orig`/:attr:`~target_orig` chains
624 Selects for peptide and nucleotide residues before sequence
625 extraction. Includes residues that would be removed by molck in
626 structure preprocessing.
628 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
636 """ Alignments of :attr:`~trimmed_model`/:attr:`~target` chains
638 Alignments for each pair of chains mapped in :attr:`~mapping`.
639 First sequence is target sequence, second sequence the model sequence.
641 :type: :class:`list` of :class:`ost.seq.AlignmentHandle`
649 """ View of :attr:`~model` that has stereochemistry checks applied
651 First, a selection for peptide/nucleotide residues is performed,
652 secondly peptide sidechains with stereochemical irregularities are
653 removed (full residue if backbone atoms are involved). Irregularities
654 are clashes or bond lengths/angles more than 12 standard deviations
655 from expected values.
657 :type: :class:`ost.mol.EntityView`
665 """ Clashing model atoms
667 :type: :class:`list` of :class:`ost.mol.alg.stereochemistry.ClashInfo`
675 """ Model bonds with unexpected stereochemistry
677 :type: :class:`list` of
678 :class:`ost.mol.alg.stereochemistry.BondViolationInfo`
686 """ Model angles with unexpected stereochemistry
688 :type: :class:`list` of
689 :class:`ost.mol.alg.stereochemistry.AngleViolationInfo`
697 """ Same as :attr:`~stereochecked_model` for :attr:`~target`
699 :type: :class:`ost.mol.EntityView`
707 """ Clashing target atoms
709 :type: :class:`list` of :class:`ost.mol.alg.stereochemistry.ClashInfo`
717 """ Target bonds with unexpected stereochemistry
719 :type: :class:`list` of
720 :class:`ost.mol.alg.stereochemistry.BondViolationInfo`
728 """ Target angles with unexpected stereochemistry
730 :type: :class:`list` of
731 :class:`ost.mol.alg.stereochemistry.AngleViolationInfo`
739 """ :attr:`~model` trimmed to target
741 Removes residues that are not covered by :class:`target` given
742 :attr:`~mapping`. In other words: no model residues without experimental
743 evidence from :class:`target`.
745 :type: :class:`ost.mol.EntityView`
753 """ Chain mapper object for given :attr:`~target`
755 :type: :class:`ost.mol.alg.chain_mapping.ChainMapper`
773 """ Full chain mapping result for :attr:`~target`/:attr:`~model`
775 Computed with :func:`ost.mol.alg.ChainMapper.GetMapping`
777 :type: :class:`ost.mol.alg.chain_mapping.MappingResult`
780 LogScript(
"Computing chain mapping")
788 """ Full chain mapping result for :attr:`~target`/:attr:`~model`
790 Computed with :func:`ost.mol.alg.ChainMapper.GetRMSDMapping`
792 :type: :class:`ost.mol.alg.chain_mapping.MappingResult`
795 LogScript(
"Computing rigid chain mapping")
802 """ Interface residues in :attr:`~model`
804 Thats all residues having a contact with at least one residue from
805 another chain (CB-CB distance <= 8A, CA in case of Glycine)
807 :type: :class:`dict` with chain names as key and and :class:`list`
808 with residue numbers of the respective interface residues.
817 """ Same as :attr:`~model_interface_residues` for :attr:`~target`
819 :type: :class:`dict` with chain names as key and and :class:`list`
820 with residue numbers of the respective interface residues.
829 """ LDDT scorer for :attr:`~target`/:attr:`~stereochecked_target`
831 Depending on :attr:`~lddt_no_stereocheck` and
832 :attr:`~lddt_symmetry_settings`.
834 :type: :class:`ost.mol.alg.lddt.lDDTScorer`
849 """ LDDT scorer for :attr:`~target`, restricted to representative
852 No stereochecks applied for bb only LDDT which considers CA atoms
853 for peptides and C3' atoms for nucleotides.
855 :type: :class:`ost.mol.alg.lddt.lDDTScorer`
865 """ QS scorer constructed from :attr:`~mapping`
867 The scorer object is constructed with default parameters and relates to
868 :attr:`~model` and :attr:`~target` (no stereochecks).
870 :type: :class:`ost.mol.alg.qsscore.QSScorer`
886 self.
mappingmapping.chem_groups,
893 """ Global LDDT score in range [0.0, 1.0]
895 Computed based on :attr:`~stereochecked_model`. In case of oligomers,
896 :attr:`~mapping` is used.
898 :type: :class:`float`
900 if self.
_lddt_lddt
is None:
902 return self.
_lddt_lddt
906 """ Per residue LDDT scores in range [0.0, 1.0]
908 Computed based on :attr:`~stereochecked_model` but scores for all
909 residues in :attr:`~model` are reported. If a residue has been removed
910 by stereochemistry checks, the respective score is set to 0.0. If a
911 residue is not covered by the target or is in a chain skipped by the
912 chain mapping procedure (happens for super short chains), the respective
913 score is set to None. In case of oligomers, :attr:`~mapping` is used.
923 """ Per atom LDDT scores in range [0.0, 1.0]
925 Computed based on :attr:`~stereochecked_model` but scores for all
926 atoms in :attr:`~model` are reported. If an atom has been removed
927 by stereochemistry checks, the respective score is set to 0.0. If an
928 atom is not covered by the target or is in a chain skipped by the
929 chain mapping procedure (happens for super short chains), the respective
930 score is set to None. In case of oligomers, :attr:`~mapping` is used.
940 """ Global LDDT score restricted to representative backbone atoms in
943 Computed based on :attr:`~model` on representative backbone atoms only.
944 This is CA for peptides and C3' for nucleotides. No stereochecks are
945 performed. In case of oligomers, :attr:`~mapping` is used.
947 :type: :class:`float`
955 """ Per residue LDDT scores restricted to representative backbone atoms
958 Computed based on :attr:`~model` on representative backbone atoms only.
959 This is CA for peptides and C3' for nucleotides. No stereochecks are
960 performed. If a residue is not covered by the target or is in a chain
961 skipped by the chain mapping procedure (happens for super short
962 chains), the respective score is set to None. In case of oligomers,
963 :attr:`~mapping` is used.
973 """ Global interface LDDT score in range [0.0, 1.0]
975 This is LDDT only based on inter-chain contacts. Value is None if no
976 such contacts are present. For example if we're dealing with a monomer.
977 Computed based on :attr:`~stereochecked_model` and :attr:`~mapping` for
980 :type: :class:`float`
982 if self.
_ilddt_ilddt
is None:
993 Computed based on :attr:`~model` using :attr:`~mapping`
995 :type: :class:`float`
1003 """ Global QS-score - only computed on aligned residues
1005 Computed based on :attr:`~model` using :attr:`~mapping`. The QS-score
1006 computation only considers contacts between residues with a mapping
1007 between target and model. As a result, the score won't be lowered in
1008 case of additional chains/residues in any of the structures.
1010 :type: :class:`float`
1018 """ Interfaces in :attr:`~target` with non-zero contribution to
1019 :attr:`~qs_global`/:attr:`~qs_best`
1021 Chain names are lexicographically sorted.
1023 :type: :class:`list` of :class:`tuple` with 2 elements each:
1034 """ Interfaces in :attr:`~model` with non-zero contribution to
1035 :attr:`~qs_global`/:attr:`~qs_best`
1037 Chain names are lexicographically sorted.
1039 :type: :class:`list` of :class:`tuple` with 2 elements each:
1051 """ Interfaces in :attr:`~qs_target_interfaces` that can be mapped
1054 Target chain names are lexicographically sorted.
1056 :type: :class:`list` of :class:`tuple` with 4 elements each:
1057 (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2)
1061 flat_mapping = self.
mappingmapping.GetFlatMapping()
1063 if i[0]
in flat_mapping
and i[1]
in flat_mapping:
1066 flat_mapping[i[1]]))
1071 """ QS-score for each interface in :attr:`~qs_interfaces`
1073 :type: :class:`list` of :class:`float`
1081 """ QS-score for each interface in :attr:`~qs_interfaces`
1083 Only computed on aligned residues
1085 :type: :class:`list` of :class:`float`
1095 A contact is a pair or residues from distinct chains that have
1096 a minimal heavy atom distance < 5A. Contacts are specified as
1097 :class:`tuple` with two strings in format:
1098 <cname>.<rnum>.<ins_code>
1100 :type: :class:`list` of :class:`tuple`
1108 """ Same for :attr:`~model`
1116 """ Same for :attr:`~trimmed_model`
1124 """ Interfaces in :class:`target` which have at least one contact
1126 Contact as defined in :attr:`~native_contacts`,
1127 chain names are lexicographically sorted.
1129 :type: :class:`list` of :class:`tuple` with 2 elements each
1134 tmp = [(min(x[0],x[1]), max(x[0],x[1]))
for x
in tmp]
1140 """ Interfaces in :class:`model` which have at least one contact
1142 Contact as defined in :attr:`~native_contacts`,
1143 chain names are lexicographically sorted.
1145 :type: :class:`list` of :class:`tuple` with 2 elements each
1150 tmp = [(min(x[0],x[1]), max(x[0],x[1]))
for x
in tmp]
1156 """ Fraction of model contacts that are also present in target
1158 :type: :class:`float`
1166 """ Fraction of target contacts that are correctly reproduced in model
1168 :type: :class:`float`
1176 """ ICS (Interface Contact Similarity) score
1178 Combination of :attr:`~ics_precision` and :attr:`~ics_recall`
1179 using the F1-measure
1181 :type: :class:`float`
1183 if self.
_ics_ics
is None:
1185 return self.
_ics_ics
1189 """ Per-interface ICS precision
1191 :attr:`~ics_precision` for each interface in
1192 :attr:`~contact_target_interfaces`
1194 :type: :class:`list` of :class:`float`
1203 """ Per-interface ICS recall
1205 :attr:`~ics_recall` for each interface in
1206 :attr:`~contact_target_interfaces`
1208 :type: :class:`list` of :class:`float`
1216 """ Per-interface ICS (Interface Contact Similarity) score
1218 :attr:`~ics` for each interface in
1219 :attr:`~contact_target_interfaces`
1221 :type: :class:`float`
1230 """ Fraction of model interface residues that are also interface
1233 :type: :class:`float`
1241 """ Fraction of target interface residues that are also interface
1244 :type: :class:`float`
1252 """ IPS (Interface Patch Similarity) score
1254 Jaccard coefficient of interface residues in target and their mapped
1255 counterparts in model
1257 :type: :class:`float`
1259 if self.
_ips_ips
is None:
1261 return self.
_ips_ips
1265 """ Same as :attr:`~ics` but with trimmed model
1267 Model is trimmed to residues which can me mapped to target in order
1268 to not penalize contacts in the model for which we have no experimental
1271 :type: :class:`float`
1279 """ Same as :attr:`~ics_precision` but with trimmed model
1281 Model is trimmed to residues which can me mapped to target in order
1282 to not penalize contacts in the model for which we have no experimental
1285 :type: :class:`float`
1293 """ Same as :attr:`~ics_recall` but with trimmed model
1295 Model is trimmed to residues which can me mapped to target in order
1296 to not penalize contacts in the model for which we have no experimental
1299 :type: :class:`float`
1307 """ Same as :attr:`~per_interface_ics_precision` but with :attr:`~trimmed_model`
1309 :attr:`~ics_precision_trimmed` for each interface in
1310 :attr:`~contact_target_interfaces`
1312 :type: :class:`list` of :class:`float`
1321 """ Same as :attr:`~per_interface_ics_recall` but with :attr:`~trimmed_model`
1323 :attr:`~ics_recall_trimmed` for each interface in
1324 :attr:`~contact_target_interfaces`
1326 :type: :class:`list` of :class:`float`
1334 """ Same as :attr:`~per_interface_ics` but with :attr:`~trimmed_model`
1336 :attr:`~ics` for each interface in
1337 :attr:`~contact_target_interfaces`
1339 :type: :class:`float`
1348 """ Same as :attr:`~ips` but with trimmed model
1350 Model is trimmed to residues which can me mapped to target in order
1351 to not penalize contacts in the model for which we have no experimental
1354 :type: :class:`float`
1362 """ Same as :attr:`~ips_precision` but with trimmed model
1364 Model is trimmed to residues which can me mapped to target in order
1365 to not penalize contacts in the model for which we have no experimental
1368 :type: :class:`float`
1376 """ Same as :attr:`~ips_recall` but with trimmed model
1378 Model is trimmed to residues which can me mapped to target in order
1379 to not penalize contacts in the model for which we have no experimental
1382 :type: :class:`float`
1390 """ Per-interface IPS precision
1392 :attr:`~ips_precision` for each interface in
1393 :attr:`~contact_target_interfaces`
1395 :type: :class:`list` of :class:`float`
1403 """ Per-interface IPS recall
1405 :attr:`~ips_recall` for each interface in
1406 :attr:`~contact_target_interfaces`
1408 :type: :class:`list` of :class:`float`
1416 """ Per-interface IPS (Interface Patch Similarity) score
1418 :attr:`~ips` for each interface in
1419 :attr:`~contact_target_interfaces`
1421 :type: :class:`list` of :class:`float`
1430 """ Same as :attr:`~per_interface_ips_precision` but with :attr:`~trimmed_model`
1432 :attr:`~ips_precision_trimmed` for each interface in
1433 :attr:`~contact_target_interfaces`
1435 :type: :class:`list` of :class:`float`
1444 """ Same as :attr:`~per_interface_ips_recall` but with :attr:`~trimmed_model`
1446 :attr:`~ics_recall_trimmed` for each interface in
1447 :attr:`~contact_target_interfaces`
1449 :type: :class:`list` of :class:`float`
1457 """ Same as :attr:`~per_interface_ips` but with :attr:`~trimmed_model`
1459 :attr:`~ics` for each interface in
1460 :attr:`~contact_target_interfaces`
1462 :type: :class:`float`
1471 """ Interfaces in :attr:`~target` that are relevant for DockQ
1473 All interfaces in :attr:`~target` with non-zero contacts that are
1474 relevant for DockQ. Includes protein-protein, protein-nucleotide and
1475 nucleotide-nucleotide interfaces. Chain names for each interface are
1476 lexicographically sorted.
1478 :type: :class:`list` of :class:`tuple` with 2 elements each:
1488 contact_d = contact_d)
1491 interfaces = cent.interacting_chains
1492 interfaces = [(min(x[0],x[1]), max(x[0],x[1]))
for x
in interfaces]
1494 pep_seqs = set([s.GetName()
for s
in self.
chain_mapperchain_mapper.polypep_seqs])
1495 nuc_seqs = set([s.GetName()
for s
in self.
chain_mapperchain_mapper.polynuc_seqs])
1497 seqs = pep_seqs.union(nuc_seqs)
1499 for interface
in interfaces:
1500 if interface[0]
in seqs
and interface[1]
in seqs:
1507 """ Interfaces in :attr:`~dockq_target_interfaces` that can be mapped
1510 Target chain names are lexicographically sorted
1512 :type: :class:`list` of :class:`tuple` with 4 elements each:
1513 (trg_ch1, trg_ch2, mdl_ch1, mdl_ch2)
1517 flat_mapping = self.
mappingmapping.GetFlatMapping()
1519 if i[0]
in flat_mapping
and i[1]
in flat_mapping:
1522 flat_mapping[i[1]]))
1527 """ DockQ scores for interfaces in :attr:`~dockq_interfaces`
1529 :class:`list` of :class:`float`
1537 """ fnat scores for interfaces in :attr:`~dockq_interfaces`
1539 fnat: Fraction of native contacts that are also present in model
1541 :class:`list` of :class:`float`
1543 if self.
_fnat_fnat
is None:
1545 return self.
_fnat_fnat
1549 """ N native contacts for interfaces in :attr:`~dockq_interfaces`
1551 :class:`list` of :class:`int`
1553 if self.
_nnat_nnat
is None:
1555 return self.
_nnat_nnat
1559 """ N model contacts for interfaces in :attr:`~dockq_interfaces`
1561 :class:`list` of :class:`int`
1563 if self.
_nmdl_nmdl
is None:
1565 return self.
_nmdl_nmdl
1569 """ fnonnat scores for interfaces in :attr:`~dockq_interfaces`
1571 fnat: Fraction of model contacts that are not present in target
1573 :class:`list` of :class:`float`
1581 """ irmsd scores for interfaces in :attr:`~dockq_interfaces`
1583 irmsd: RMSD of interface (RMSD computed on backbone atoms) which
1584 consists of each residue that has at least one heavy atom within 10A of
1585 other chain. Backbone atoms for proteins: "CA","C","N","O", for
1586 nucleotides: "P", "OP1", "OP2", "O2'", "O3'", "O4'", "O5'", "C1'",
1587 "C2'", "C3'", "C4'", "C5'".
1589 :class:`list` of :class:`float`
1591 if self.
_irmsd_irmsd
is None:
1597 """ lrmsd scores for interfaces in :attr:`~dockq_interfaces`
1599 lrmsd: The two chains involved in the interface are superposed based on
1600 the receptor (rigid min RMSD superposition) and the ligand RMSD is
1601 reported. Receptor is the chain with more residues. Superposition and
1602 RMSD is computed on same backbone atoms as :attr:`~irmsd`.
1604 :class:`list` of :class:`float`
1606 if self.
_lrmsd_lrmsd
is None:
1612 """ Average of DockQ scores in :attr:`~dockq_scores`
1614 In its original implementation, DockQ only operates on single
1615 interfaces. Thus the requirement to combine scores for higher order
1618 :type: :class:`float`
1626 """ Same as :attr:`~dockq_ave`, weighted by native contacts
1628 :type: :class:`float`
1636 """ Same as :attr:`~dockq_ave` but penalizing for missing interfaces
1638 Interfaces that are not covered in model are added as 0.0
1639 in average computation.
1641 :type: :class:`float`
1649 """ Same as :attr:`~dockq_ave_full`, but weighted
1651 Interfaces that are not covered in model are added as 0.0 in
1652 average computations and the respective weights are derived from
1653 number of contacts in respective target interface.
1661 """ Mapped representative positions in target
1663 Thats CA positions for peptide residues and C3' positions for
1664 nucleotides. Has same length as :attr:`~mapped_model_pos` and mapping
1665 is based on :attr:`~mapping`.
1667 :type: :class:`ost.geom.Vec3List`
1675 """ Mapped representative positions in target
1677 Thats the equivalent of :attr:`~mapped_target_pos` but containing more
1678 backbone atoms (N, CA, C for peptide residues and O5', C5', C4', C3', O3
1679 for nucleotide residues). mapping is based on :attr:`~mapping`.
1681 :type: :class:`ost.geom.Vec3List`
1689 """ Mapped representative positions in model
1691 Thats CA positions for peptide residues and C3' positions for
1692 nucleotides. Has same length as :attr:`~mapped_target_pos` and mapping
1693 is based on :attr:`~mapping`.
1695 :type: :class:`ost.geom.Vec3List`
1703 """ Mapped representative positions in model
1705 Thats the equivalent of :attr:`~mapped_model_pos` but containing more
1706 backbone atoms (N, CA, C for peptide residues and O5', C5', C4', C3', O3
1707 for nucleotide residues). mapping is based on :attr:`~mapping`.
1709 :type: :class:`ost.geom.Vec3List`
1717 """ :attr:`~mapped_model_pos` with :attr:`~transform` applied
1719 :type: :class:`ost.geom.Vec3List`
1729 """ Number of target residues which have no mapping to model
1739 """ Transform: :attr:`~mapped_model_pos` onto :attr:`~mapped_target_pos`
1741 Computed using Kabsch minimal rmsd algorithm. If number of positions
1742 is too small (< 3), :attr:`~mapped_model_pos_full_bb` and
1743 :attr:`~mapped_target_pos_full_bb` are used.
1745 :type: :class:`ost.geom.Mat4`
1752 self.
_transform_transform = res.transformation
1759 self.
_transform_transform = res.transformation
1764 """ Mapped representative positions in target
1766 Thats CA positions for peptide residues and C3' positions for
1767 nucleotides. Has same length as :attr:`~rigid_mapped_model_pos` and mapping
1768 is based on :attr:`~rigid_mapping`.
1770 :type: :class:`ost.geom.Vec3List`
1778 """ Mapped representative positions in target
1780 Thats the equivalent of :attr:`~rigid_mapped_target_pos` but containing
1781 more backbone atoms (N, CA, C for peptide residues and O5', C5', C4',
1782 C3', O3 for nucleotide residues). mapping is based on :attr:`~mapping`.
1784 :type: :class:`ost.geom.Vec3List`
1792 """ Mapped representative positions in model
1794 Thats CA positions for peptide residues and C3' positions for
1795 nucleotides. Has same length as :attr:`~mapped_target_pos` and mapping
1796 is based on :attr:`~rigid_mapping`.
1798 :type: :class:`ost.geom.Vec3List`
1806 """ Mapped representative positions in model
1808 Thats the equivalent of :attr:`~rigid_mapped_model_pos` but containing
1809 more backbone atoms (N, CA, C for peptide residues and O5', C5', C4',
1810 C3', O3 for nucleotide residues). mapping is based on :attr:`~mapping`.
1812 :type: :class:`ost.geom.Vec3List`
1820 """ :attr:`~rigid_mapped_model_pos` with :attr:`~rigid_transform` applied
1822 :type: :class:`ost.geom.Vec3List`
1832 """ Number of target residues which have no rigid mapping to model
1842 """ Transform: :attr:`~rigid_mapped_model_pos` onto :attr:`~rigid_mapped_target_pos`
1844 Computed using Kabsch minimal rmsd algorithm. If number of positions
1845 is too small (< 3), :attr:`~rigid_mapped_model_pos_full_bb` and
1846 :attr:`~rigid_mapped_target_pos_full_bb` are used.
1848 :type: :class:`ost.geom.Mat4`
1867 """ Fraction CA (C3' for nucleotides) that can be superposed within 0.5A
1869 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1870 Similar iterative algorithm as LGA tool
1872 :type: :class:`float`
1874 if self.
_gdt_05_gdt_05
is None:
1877 for window_size
in wsizes:
1880 window_size, 1000, 0.5)[0]
1885 self.
_gdt_05_gdt_05 = float(n) / n_full
1892 """ Fraction CA (C3' for nucleotides) that can be superposed within 1.0A
1894 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1895 Similar iterative algorithm as LGA tool
1897 :type: :class:`float`
1899 if self.
_gdt_1_gdt_1
is None:
1902 for window_size
in wsizes:
1905 window_size, 1000, 1.0)[0]
1910 self.
_gdt_1_gdt_1 = float(n) / n_full
1917 """ Fraction CA (C3' for nucleotides) that can be superposed within 2.0A
1919 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1920 Similar iterative algorithm as LGA tool
1923 :type: :class:`float`
1925 if self.
_gdt_2_gdt_2
is None:
1928 for window_size
in wsizes:
1931 window_size, 1000, 2.0)[0]
1936 self.
_gdt_2_gdt_2 = float(n) / n_full
1943 """ Fraction CA (C3' for nucleotides) that can be superposed within 4.0A
1945 Uses :attr:`~rigid_mapped_model_pos` and :attr:`~rigid_mapped_target_pos`.
1946 Similar iterative algorithm as LGA tool
1948 :type: :class:`float`
1950 if self.
_gdt_4_gdt_4
is None:
1953 for window_size
in wsizes:
1956 window_size, 1000, 4.0)[0]
1961 self.
_gdt_4_gdt_4 = float(n) / n_full
1968 """ Fraction CA (C3' for nucleotides) that can be superposed within 8.0A
1970 Similar iterative algorithm as LGA tool
1972 :type: :class:`float`
1974 if self.
_gdt_8_gdt_8
is None:
1977 for window_size
in wsizes:
1980 window_size, 1000, 8.0)[0]
1985 self.
_gdt_8_gdt_8 = float(n) / n_full
1993 """ avg GDT with thresholds: 8.0A, 4.0A, 2.0A and 1.0A
1995 :type: :class:`float`
1997 if self.
_gdtts_gdtts
is None:
1998 LogScript(
"Computing GDT-TS score")
2004 """ avg GDT with thresholds: 4.0A, 2.0A, 1.0A and 0.5A
2006 :type: :class:`float`
2008 if self.
_gdtha_gdtha
is None:
2009 LogScript(
"Computing GDT-HA score")
2017 Computed on :attr:`~rigid_transformed_mapped_model_pos` and
2018 :attr:`~rigid_mapped_target_pos`
2020 :type: :class:`float`
2022 if self.
_rmsd_rmsd
is None:
2023 LogScript(
"Computing RMSD")
2026 return self.
_rmsd_rmsd
2030 """ The global CAD atom-atom (AA) score
2032 Computed based on :attr:`~model`. In case of oligomers, :attr:`~mapping`
2035 :type: :class:`float`
2043 """ The per-residue CAD atom-atom (AA) scores
2045 Computed based on :attr:`~model`. In case of oligomers, :attr:`~mapping`
2048 :type: :class:`dict`
2056 """ Patch QS-scores for each residue in :attr:`~model_interface_residues`
2058 Representative patches for each residue r in chain c are computed as
2061 * mdl_patch_one: All residues in c with CB (CA for GLY) positions within
2062 8A of r and within 12A of residues from any other chain.
2063 * mdl_patch_two: Closest residue x to r in any other chain gets
2064 identified. Patch is then constructed by selecting all residues from
2065 any other chain within 8A of x and within 12A from any residue in c.
2066 * trg_patch_one: Chain name and residue number based mapping from
2068 * trg_patch_two: Chain name and residue number based mapping from
2071 Results are stored in the same manner as
2072 :attr:`~model_interface_residues`, with corresponding scores instead of
2073 residue numbers. Scores for residues which are not
2074 :class:`mol.ChemType.AMINOACIDS` are set to None. Additionally,
2075 interface patches are derived from :attr:`~model`. If they contain
2076 residues which are not covered by :attr:`~target`, the score is set to
2079 :type: :class:`dict` with chain names as key and and :class:`list`
2080 with scores of the respective interface residues.
2088 """ Same as :attr:`~patch_qs` but for DockQ scores
2096 """ TM-score computed with USalign
2098 USalign executable can be specified with usalign_exec kwarg at Scorer
2099 construction, an OpenStructure internal copy of the USalign code is
2102 :type: :class:`float`
2110 """ Mapping computed with USalign
2112 Dictionary with target chain names as key and model chain names as
2113 values. No guarantee that all chains are mapped. USalign executable
2114 can be specified with usalign_exec kwarg at Scorer construction, an
2115 OpenStructure internal copy of the USalign code is used otherwise.
2117 :type: :class:`dict`
2123 def _aln_helper(self, target, model):
2128 for ch
in target.chains:
2129 cname = ch.GetName()
2130 s =
''.join([r.one_letter_code
for r
in ch.residues])
2131 s = seq.CreateSequence(ch.GetName(), s)
2132 trg_seqs[ch.GetName()] = s
2134 for ch
in model.chains:
2135 cname = ch.GetName()
2136 s =
''.join([r.one_letter_code
for r
in ch.residues])
2137 s = seq.CreateSequence(cname, s)
2138 mdl_seqs[ch.GetName()] = s
2141 trg_pep_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polypep_seqs]
2142 trg_nuc_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polynuc_seqs]
2143 trg_pep_chains = set(trg_pep_chains)
2144 trg_nuc_chains = set(trg_nuc_chains)
2145 for trg_ch, mdl_ch
in self.
mappingmapping.GetFlatMapping().items():
2146 if mdl_ch
in mdl_seqs
and trg_ch
in trg_seqs:
2147 if trg_ch
in trg_pep_chains:
2148 stype = mol.ChemType.AMINOACIDS
2149 elif trg_ch
in trg_nuc_chains:
2150 stype = mol.ChemType.NUCLEOTIDES
2152 raise RuntimeError(
"Chain name inconsistency... ask "
2155 aln = self.
chain_mapperchain_mapper.ResNumAlign(trg_seqs[trg_ch],
2159 aln = self.
chain_mapperchain_mapper.NWAlign(trg_seqs[trg_ch],
2166 def _compute_aln(self):
2169 def _compute_stereochecked_aln(self):
2172 for a
in self.
alnaln:
2173 trg_s = a.GetSequence(0)
2174 mdl_s = a.GetSequence(1)
2175 trg_ch = self.
targettarget.FindChain(trg_s.name)
2176 mdl_ch = self.
modelmodel.FindChain(mdl_s.name)
2178 sc_trg_olc = [
'-'] * len(trg_s)
2179 sc_mdl_olc = [
'-'] * len(mdl_s)
2182 if sc_trg_ch.IsValid():
2185 trg_residues = trg_ch.residues
2187 for olc_idx, olc
in enumerate(trg_s):
2189 r = trg_residues[res_idx]
2190 sc_r = sc_trg_ch.FindResidue(r.GetNumber())
2192 sc_trg_olc[olc_idx] = sc_r.one_letter_code
2196 if sc_mdl_ch.IsValid():
2199 mdl_residues = mdl_ch.residues
2201 for olc_idx, olc
in enumerate(mdl_s):
2203 r = mdl_residues[res_idx]
2204 sc_r = sc_mdl_ch.FindResidue(r.GetNumber())
2206 sc_mdl_olc[olc_idx] = sc_r.one_letter_code
2209 sc_trg_s = seq.CreateSequence(trg_s.name,
''.join(sc_trg_olc))
2210 sc_mdl_s = seq.CreateSequence(mdl_s.name,
''.join(sc_mdl_olc))
2211 new_a = seq.CreateAlignment()
2212 new_a.AddSequence(sc_trg_s)
2213 new_a.AddSequence(sc_mdl_s)
2218 def _compute_pepnuc_aln(self):
2222 def _compute_lddt(self):
2223 LogScript(
"Computing all-atom LDDT")
2225 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
2229 for aln
in self.
alnaln:
2230 mdl_seq = aln.GetSequence(1)
2231 alns[mdl_seq.name] = aln
2236 aa_local_lddt =
None
2239 lddt_chain_mapping = dict()
2240 for mdl_ch, trg_ch
in flat_mapping.items():
2242 lddt_chain_mapping[mdl_ch] = trg_ch
2244 chain_mapping = lddt_chain_mapping,
2245 residue_mapping = alns,
2246 check_resnames=
False,
2247 local_lddt_prop=
"lddt",
2249 set_atom_props=
True)[0]
2251 aa_local_lddt = dict()
2252 for r
in self.
modelmodel.residues:
2254 cname = r.GetChain().GetName()
2255 if cname
not in local_lddt:
2256 local_lddt[cname] = dict()
2257 aa_local_lddt[cname] = dict()
2259 rnum = r.GetNumber()
2260 if rnum
not in aa_local_lddt[cname]:
2261 aa_local_lddt[cname][rnum] = dict()
2263 if r.HasProp(
"lddt"):
2264 score = round(r.GetFloatProp(
"lddt"), 3)
2265 local_lddt[cname][rnum] = score
2269 local_lddt[cname][rnum] =
None
2272 if a.HasProp(
"lddt"):
2273 score = round(a.GetFloatProp(
"lddt"), 3)
2274 aa_local_lddt[cname][rnum][a.GetName()] = score
2279 aa_local_lddt[cname][rnum][a.GetName()] =
None
2290 stereochecked_alns = dict()
2292 mdl_seq = aln.GetSequence(1)
2293 if mdl_seq.GetName()
in mdl_chains:
2294 stereochecked_alns[mdl_seq.name] = aln
2296 lddt_chain_mapping = dict()
2297 for mdl_ch, trg_ch
in flat_mapping.items():
2298 if mdl_ch
in stereochecked_alns:
2299 lddt_chain_mapping[mdl_ch] = trg_ch
2303 chain_mapping = lddt_chain_mapping,
2304 residue_mapping = stereochecked_alns,
2305 check_resnames=
False,
2306 local_lddt_prop=
"lddt",
2308 set_atom_props=
True)[0]
2310 aa_local_lddt = dict()
2311 for r
in self.
modelmodel.residues:
2313 cname = r.GetChain().GetName()
2314 if cname
not in local_lddt:
2315 local_lddt[cname] = dict()
2316 aa_local_lddt[cname] = dict()
2318 rnum = r.GetNumber()
2319 if rnum
not in aa_local_lddt[cname]:
2320 aa_local_lddt[cname][rnum] = dict()
2322 if r.HasProp(
"lddt"):
2323 score = round(r.GetFloatProp(
"lddt"), 3)
2324 local_lddt[cname][rnum] = score
2330 if a.HasProp(
"lddt"):
2331 score = round(a.GetFloatProp(
"lddt"), 3)
2332 aa_local_lddt[cname][rnum][a.GetName()] = score
2345 if cname
in flat_mapping:
2346 for x, y
in _GetAlignedResidues(alns[cname],
2349 if y.number == r.number:
2354 tmp_cname = tmp.GetChain().GetName()
2355 tmp_rnum = tmp.GetNumber()
2371 aa_local_lddt[cname][rnum][a.GetName()] =
None
2372 elif trg_r
is not None and not trg_r.FindAtom(a.GetName()).IsValid():
2374 aa_local_lddt[cname][rnum][a.GetName()] =
None
2375 elif trg_r
is not None and trg_r.FindAtom(a.GetName()).IsValid()
and \
2380 aa_local_lddt[cname][rnum][a.GetName()] = 0.0
2381 elif trg_r
is not None and trg_r.FindAtom(a.GetName()).IsValid()
and \
2382 mdl_r
is not None and not mdl_r.FindAtom(a.GetName()).IsValid():
2384 aa_local_lddt[cname][rnum][a.GetName()] = 0.0
2387 aa_local_lddt[cname][rnum][a.GetName()] =
None
2391 if mdl_res.IsValid():
2394 local_lddt[cname][rnum] =
None
2396 aa_local_lddt[cname][rnum][a.GetName()] =
None
2404 if cname
in flat_mapping:
2406 for x, y
in _GetAlignedResidues(alns[cname],
2409 if y.number == r.number:
2414 tmp_cname = tmp.GetChain().GetName()
2415 tmp_rnum = tmp.GetNumber()
2423 local_lddt[cname][rnum] =
None
2425 aa_local_lddt[cname][rnum][a.GetName()] =
None
2427 local_lddt[cname][rnum] = 0.0
2429 if trg_r.FindAtom(a.GetName()).IsValid():
2430 aa_local_lddt[cname][rnum][a.GetName()] = 0.0
2432 aa_local_lddt[cname][rnum][a.GetName()] =
None
2434 self.
_lddt_lddt = lddt_score
2438 def _compute_bb_lddt(self):
2439 LogScript(
"Computing backbone LDDT")
2442 for aln
in self.
alnaln:
2443 mdl_seq = aln.GetSequence(1)
2444 alns[mdl_seq.name] = aln
2447 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
2448 lddt_chain_mapping = dict()
2449 for mdl_ch, trg_ch
in flat_mapping.items():
2451 lddt_chain_mapping[mdl_ch] = trg_ch
2454 chain_mapping = lddt_chain_mapping,
2455 residue_mapping = alns,
2456 check_resnames=
False,
2457 local_lddt_prop=
"bb_lddt",
2460 for r
in self.
modelmodel.residues:
2461 cname = r.GetChain().GetName()
2462 if cname
not in local_lddt:
2463 local_lddt[cname] = dict()
2464 if r.HasProp(
"bb_lddt"):
2465 score = round(r.GetFloatProp(
"bb_lddt"), 3)
2466 local_lddt[cname][r.GetNumber()] = score
2470 local_lddt[cname][r.GetNumber()] =
None
2475 def _compute_ilddt(self):
2476 LogScript(
"Computing all-atom iLDDT")
2478 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
2482 for aln
in self.
alnaln:
2483 mdl_seq = aln.GetSequence(1)
2484 alns[mdl_seq.name] = aln
2485 lddt_chain_mapping = dict()
2486 for mdl_ch, trg_ch
in flat_mapping.items():
2488 lddt_chain_mapping[mdl_ch] = trg_ch
2490 chain_mapping = lddt_chain_mapping,
2491 residue_mapping = alns,
2492 check_resnames=
False,
2493 local_lddt_prop=
"lddt",
2495 no_intrachain=
True)[0]
2505 stereochecked_alns = dict()
2507 mdl_seq = aln.GetSequence(1)
2508 if mdl_seq.GetName()
in mdl_chains:
2509 stereochecked_alns[mdl_seq.name] = aln
2511 lddt_chain_mapping = dict()
2512 for mdl_ch, trg_ch
in flat_mapping.items():
2513 if mdl_ch
in stereochecked_alns:
2514 lddt_chain_mapping[mdl_ch] = trg_ch
2517 chain_mapping = lddt_chain_mapping,
2518 residue_mapping = stereochecked_alns,
2519 check_resnames=
False,
2520 local_lddt_prop=
"lddt",
2522 no_intrachain=
True)[0]
2525 def _compute_qs(self):
2526 LogScript(
"Computing global QS-score")
2527 qs_score_result = self.
qs_scorerqs_scorer.Score(self.
mappingmapping.mapping)
2528 self.
_qs_global_qs_global = qs_score_result.QS_global
2529 self.
_qs_best_qs_best = qs_score_result.QS_best
2531 def _compute_per_interface_qs_scores(self):
2532 LogScript(
"Computing per-interface QS-score")
2537 trg_ch1 = interface[0]
2538 trg_ch2 = interface[1]
2539 mdl_ch1 = interface[2]
2540 mdl_ch2 = interface[3]
2541 qs_res = self.
qs_scorerqs_scorer.ScoreInterface(trg_ch1, trg_ch2,
2546 def _compute_ics_scores(self):
2547 LogScript(
"Computing ICS scores")
2550 self.
_ics_recall_ics_recall = contact_scorer_res.recall
2551 self.
_ics_ics = contact_scorer_res.ics
2555 flat_mapping = self.
mappingmapping.GetFlatMapping()
2557 trg_ch1 = trg_int[0]
2558 trg_ch2 = trg_int[1]
2559 if trg_ch1
in flat_mapping
and trg_ch2
in flat_mapping:
2560 mdl_ch1 = flat_mapping[trg_ch1]
2561 mdl_ch2 = flat_mapping[trg_ch2]
2562 res = self.
contact_scorercontact_scorer.ScoreICSInterface(trg_ch1, trg_ch2,
2572 def _trim_model(self):
2573 trimmed_mdl = mol.CreateEntityFromView(self.
mappingmapping.model,
True)
2574 trimmed_aln = dict()
2576 for trg_cname, mdl_cname
in self.
mappingmapping.GetFlatMapping().items():
2577 mdl_ch = trimmed_mdl.FindChain(mdl_cname)
2578 aln = self.
mappingmapping.alns[(trg_cname, mdl_cname)]
2581 assert(mdl_ch.GetResidueCount() == \
2582 len(aln.GetSequence(1).GetGaplessString()))
2584 mdl_residues = mdl_ch.residues
2586 aligned_mdl_seq = [
'-'] * aln.GetLength()
2587 for col_idx, col
in enumerate(aln):
2588 if col[0] !=
'-' and col[1] !=
'-':
2589 mdl_res = mdl_residues[mdl_res_idx]
2590 mdl_res.SetIntProp(
"aligned", 1)
2591 aligned_mdl_seq[col_idx] = col[1]
2594 aligned_mdl_seq =
''.join(aligned_mdl_seq)
2595 aligned_mdl_seq = seq.CreateSequence(mdl_cname, aligned_mdl_seq)
2597 new_aln = seq.CreateAlignment()
2598 new_aln.AddSequence(aln.GetSequence(0))
2599 new_aln.AddSequence(aligned_mdl_seq)
2600 trimmed_aln[(trg_cname, mdl_cname)] = new_aln
2602 self.
_trimmed_model_trimmed_model = trimmed_mdl.Select(
"graligned:0=1")
2605 def _compute_ics_scores_trimmed(self):
2606 LogScript(
"Computing ICS scores trimmed")
2620 flat_mapping = self.
mappingmapping.GetFlatMapping()
2622 trg_ch1 = trg_int[0]
2623 trg_ch2 = trg_int[1]
2624 if trg_ch1
in flat_mapping
and trg_ch2
in flat_mapping:
2625 mdl_ch1 = flat_mapping[trg_ch1]
2626 mdl_ch2 = flat_mapping[trg_ch2]
2637 def _compute_ips_scores(self):
2638 LogScript(
"Computing IPS scores")
2641 self.
_ips_recall_ips_recall = contact_scorer_res.recall
2642 self.
_ips_ips = contact_scorer_res.ips
2647 flat_mapping = self.
mappingmapping.GetFlatMapping()
2649 trg_ch1 = trg_int[0]
2650 trg_ch2 = trg_int[1]
2651 if trg_ch1
in flat_mapping
and trg_ch2
in flat_mapping:
2652 mdl_ch1 = flat_mapping[trg_ch1]
2653 mdl_ch2 = flat_mapping[trg_ch2]
2654 res = self.
contact_scorercontact_scorer.ScoreIPSInterface(trg_ch1, trg_ch2,
2664 def _compute_ips_scores_trimmed(self):
2665 LogScript(
"Computing IPS scores trimmed")
2678 flat_mapping = self.
mappingmapping.GetFlatMapping()
2680 trg_ch1 = trg_int[0]
2681 trg_ch2 = trg_int[1]
2682 if trg_ch1
in flat_mapping
and trg_ch2
in flat_mapping:
2683 mdl_ch1 = flat_mapping[trg_ch1]
2684 mdl_ch2 = flat_mapping[trg_ch2]
2695 def _compute_dockq_scores(self):
2696 LogScript(
"Computing DockQ")
2699 raise RuntimeError(
"Cannot compute DockQ for reference structures "
2700 "with nucleotide chains if dockq_capri_peptide "
2705 self.
_fnat_fnat = list()
2707 self.
_irmsd_irmsd = list()
2708 self.
_lrmsd_lrmsd = list()
2709 self.
_nnat_nnat = list()
2710 self.
_nmdl_nmdl = list()
2713 for aln
in self.
alnaln:
2714 trg_s = aln.GetSequence(0)
2715 mdl_s = aln.GetSequence(1)
2716 dockq_alns[(trg_s.GetName(), mdl_s.GetName())] = aln
2719 trg_ch1 = interface[0]
2720 trg_ch2 = interface[1]
2721 mdl_ch1 = interface[2]
2722 mdl_ch2 = interface[3]
2723 aln1 = dockq_alns[(trg_ch1, mdl_ch1)]
2724 aln2 = dockq_alns[(trg_ch2, mdl_ch2)]
2726 res = dockq.DockQ(self.
modelmodel, self.
targettarget, mdl_ch1, mdl_ch2,
2727 trg_ch1, trg_ch2, ch1_aln=aln1,
2728 ch2_aln=aln2, contact_dist_thresh = 4.0,
2729 interface_dist_thresh=8.0,
2730 interface_cb =
True)
2732 res = dockq.DockQ(self.
modelmodel, self.
targettarget, mdl_ch1, mdl_ch2,
2733 trg_ch1, trg_ch2, ch1_aln=aln1,
2736 self.
_fnat_fnat.append(res[
"fnat"])
2737 self.
_fnonnat_fnonnat.append(res[
"fnonnat"])
2738 self.
_irmsd_irmsd.append(res[
"irmsd"])
2739 self.
_lrmsd_lrmsd.append(res[
"lrmsd"])
2741 self.
_nnat_nnat.append(res[
"nnat"])
2742 self.
_nmdl_nmdl.append(res[
"nmdl"])
2747 not_covered_counts = list()
2748 proc_trg_interfaces = set([(x[0], x[1])
for x
in self.
dockq_interfacesdockq_interfaces])
2750 if interface
not in proc_trg_interfaces:
2754 trg_ch1 = interface[0]
2755 trg_ch2 = interface[1]
2758 res = dockq.DockQ(self.
targettarget, self.
targettarget,
2759 trg_ch1, trg_ch2, trg_ch1, trg_ch2,
2760 contact_dist_thresh = 4.0,
2761 interface_dist_thresh=8.0,
2762 interface_cb =
True)
2764 res = dockq.DockQ(self.
targettarget, self.
targettarget,
2765 trg_ch1, trg_ch2, trg_ch1, trg_ch2)
2767 not_covered_counts.append(res[
"nnat"])
2774 weights = np.array(self.
_nnat_nnat)
2779 self.
_dockq_wave_dockq_wave = np.sum(np.multiply(weights/np.sum(weights), scores))
2780 scores = np.append(scores, [0.0]*len(not_covered_counts))
2781 weights = np.append(weights, not_covered_counts)
2786 self.
_dockq_wave_full_dockq_wave_full = np.sum(np.multiply(weights/np.sum(weights),
2789 def _extract_mapped_pos(self):
2793 processed_trg_chains = set()
2794 for trg_ch, mdl_ch
in self.
mappingmapping.GetFlatMapping().items():
2795 processed_trg_chains.add(trg_ch)
2796 aln = self.
mappingmapping.alns[(trg_ch, mdl_ch)]
2798 for trg_res, mdl_res
in _GetAlignedResidues(aln,
2801 trg_at = trg_res.FindAtom(
"CA")
2802 mdl_at = mdl_res.FindAtom(
"CA")
2803 if not trg_at.IsValid():
2804 trg_at = trg_res.FindAtom(
"C3'")
2805 if not mdl_at.IsValid():
2806 mdl_at = mdl_res.FindAtom(
"C3'")
2810 self.
_n_target_not_mapped_n_target_not_mapped += (len(aln.GetSequence(0).GetGaplessString())-n_mapped)
2812 for ch
in self.
mappingmapping.target.chains:
2813 if ch.GetName()
not in processed_trg_chains:
2816 def _extract_mapped_pos_full_bb(self):
2819 exp_pep_atoms = [
"N",
"CA",
"C"]
2820 exp_nuc_atoms = [
"O5'",
"C5'",
"C4'",
"C3'",
"O3'"]
2821 trg_pep_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polypep_seqs]
2822 trg_nuc_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polynuc_seqs]
2823 for trg_ch, mdl_ch
in self.
mappingmapping.GetFlatMapping().items():
2824 aln = self.
mappingmapping.alns[(trg_ch, mdl_ch)]
2825 trg_ch = aln.GetSequence(0).GetName()
2826 if trg_ch
in trg_pep_chains:
2827 exp_atoms = exp_pep_atoms
2828 elif trg_ch
in trg_nuc_chains:
2829 exp_atoms = exp_nuc_atoms
2832 raise RuntimeError(
"Unexpected error - contact OST developer")
2833 for trg_res, mdl_res
in _GetAlignedResidues(aln,
2836 for aname
in exp_atoms:
2837 trg_at = trg_res.FindAtom(aname)
2838 mdl_at = mdl_res.FindAtom(aname)
2839 if not (trg_at.IsValid()
and mdl_at.IsValid()):
2841 raise RuntimeError(
"Unexpected error - contact OST "
2847 def _extract_rigid_mapped_pos(self):
2851 processed_trg_chains = set()
2852 for trg_ch, mdl_ch
in self.
rigid_mappingrigid_mapping.GetFlatMapping().items():
2853 processed_trg_chains.add(trg_ch)
2854 aln = self.
rigid_mappingrigid_mapping.alns[(trg_ch, mdl_ch)]
2856 for trg_res, mdl_res
in _GetAlignedResidues(aln,
2859 trg_at = trg_res.FindAtom(
"CA")
2860 mdl_at = mdl_res.FindAtom(
"CA")
2861 if not trg_at.IsValid():
2862 trg_at = trg_res.FindAtom(
"C3'")
2863 if not mdl_at.IsValid():
2864 mdl_at = mdl_res.FindAtom(
"C3'")
2872 if ch.GetName()
not in processed_trg_chains:
2875 def _extract_rigid_mapped_pos_full_bb(self):
2878 exp_pep_atoms = [
"N",
"CA",
"C"]
2879 exp_nuc_atoms = [
"O5'",
"C5'",
"C4'",
"C3'",
"O3'"]
2880 trg_pep_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polypep_seqs]
2881 trg_nuc_chains = [s.GetName()
for s
in self.
chain_mapperchain_mapper.polynuc_seqs]
2882 for trg_ch, mdl_ch
in self.
rigid_mappingrigid_mapping.GetFlatMapping().items():
2883 aln = self.
rigid_mappingrigid_mapping.alns[(trg_ch, mdl_ch)]
2884 trg_ch = aln.GetSequence(0).GetName()
2885 if trg_ch
in trg_pep_chains:
2886 exp_atoms = exp_pep_atoms
2887 elif trg_ch
in trg_nuc_chains:
2888 exp_atoms = exp_nuc_atoms
2891 raise RuntimeError(
"Unexpected error - contact OST developer")
2892 for trg_res, mdl_res
in _GetAlignedResidues(aln,
2895 for aname
in exp_atoms:
2896 trg_at = trg_res.FindAtom(aname)
2897 mdl_at = mdl_res.FindAtom(aname)
2898 if not (trg_at.IsValid()
and mdl_at.IsValid()):
2900 raise RuntimeError(
"Unexpected error - contact OST "
2905 def _compute_cad_score(self):
2907 raise RuntimeError(
"CAD score computations rely on residue numbers "
2908 "that are consistent between target and model "
2909 "chains, i.e. only work if resnum_alignments "
2910 "is True at Scorer construction.")
2912 LogScript(
"Computing CAD score")
2914 settings.Locate(
"voronota-cadscore",
2916 except Exception
as e:
2917 raise RuntimeError(
"voronota-cadscore must be in PATH for CAD "
2918 "score scoring")
from e
2919 cad_bin_dir = os.path.dirname(cad_score_exec)
2920 m = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
2921 cad_result = cadscore.CADScore(self.
modelmodel, self.
targettarget,
2925 cad_bin_path=cad_bin_dir,
2929 for r
in self.
modelmodel.residues:
2930 cname = r.GetChain().GetName()
2931 if cname
not in local_cad:
2932 local_cad[cname] = dict()
2933 if r.HasProp(
"localcad"):
2934 score = round(r.GetFloatProp(
"localcad"), 3)
2935 local_cad[cname][r.GetNumber()] = score
2937 local_cad[cname][r.GetNumber()] =
None
2939 self.
_cad_score_cad_score = cad_result.globalAA
2942 def _get_repr_view(self, ent):
2943 """ Returns view with representative peptide atoms => CB, CA for GLY
2945 Ensures that each residue has exactly one atom with assertions
2947 :param ent: Entity for which you want the representative view
2948 :param ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
2949 :returns: :class:`ost.mol.EntityView` derived from ent
2951 repr_ent = ent.Select(
"(aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\"))")
2952 for r
in repr_ent.residues:
2953 assert(len(r.atoms) == 1)
2956 def _get_interface_residues(self, ent):
2957 """ Get interface residues
2959 Thats all residues having a contact with at least one residue from another
2960 chain (CB-CB distance <= 8A, CA in case of Glycine)
2962 :param ent: Model for which to extract interface residues
2963 :type ent: :class:`ost.mol.EntityView`
2964 :returns: :class:`dict` with chain names as key and and :class:`list`
2965 with residue numbers of the respective interface residues.
2969 result = {ch.GetName(): list()
for ch
in ent.chains}
2970 for ch
in ent.chains:
2971 cname = ch.GetName()
2972 sel = repr_ent.Select(f
"(cname={mol.QueryQuoteName(cname)} and 8 <> [cname!={mol.QueryQuoteName(cname)}])")
2973 result[cname] = [r.GetNumber()
for r
in sel.residues]
2976 def _do_stereochecks(self):
2977 """ Perform stereochemistry checks on model and target
2979 LogInfo(
"Performing stereochemistry checks on model and target")
2980 data = stereochemistry.GetDefaultStereoData()
2981 l_data = stereochemistry.GetDefaultStereoLinkData()
2983 a, b, c, d = stereochemistry.StereoCheck(self.
modelmodel, stereo_data = data,
2984 stereo_link_data = l_data)
2990 a, b, c, d = stereochemistry.StereoCheck(self.
targettarget, stereo_data = data,
2991 stereo_link_data = l_data)
2998 def _get_interface_patches(self, mdl_ch, mdl_rnum):
2999 """ Select interface patches representative for specified residue
3001 The patches for specified residue r in chain c are selected as follows:
3003 * mdl_patch_one: All residues in c with CB (CA for GLY) positions within 8A
3004 of r and within 12A of residues from any other chain.
3005 * mdl_patch_two: Closest residue x to r in any other chain gets identified.
3006 Patch is then constructed by selecting all residues from any other chain
3007 within 8A of x and within 12A from any residue in c.
3008 * trg_patch_one: Chain name and residue number based mapping from
3010 * trg_patch_two: Chain name and residue number based mapping from
3013 :param mdl_ch: Name of chain in *self.model* of residue of interest
3014 :type mdl_ch: :class:`str`
3015 :param mdl_rnum: Residue number of residue of interest
3016 :type mdl_rnum: :class:`ost.mol.ResNum`
3017 :returns: Tuple with 5 elements: 1) :class:`bool` flag whether all residues
3018 in *mdl* patches are covered in *trg* 2) mtl_patch_one
3019 3) mdl_patch_two 4) trg_patch_one 5) trg_patch_two
3025 r = self.
modelmodel.FindResidue(mdl_ch, mdl_rnum)
3027 raise RuntimeError(f
"Cannot find residue {mdl_rnum} in chain {mdl_ch}")
3028 if r.GetName() ==
"GLY":
3029 at = r.FindAtom(
"CA")
3031 at = r.FindAtom(
"CB")
3032 if not at.IsValid():
3033 raise RuntimeError(
"Cannot find interface views for res without CB/CA")
3040 q1 = f
"(cname={mol.QueryQuoteName(mdl_ch)} and 8 <> {{{r_pos[0]},{r_pos[1]},{r_pos[2]}}})"
3042 q2 = f
"(12 <> [cname!={mol.QueryQuoteName(mdl_ch)}])"
3043 mdl_patch_one = self.
modelmodel.CreateEmptyView()
3044 sel = repr_mdl.Select(
" and ".join([q1, q2]))
3045 for r
in sel.residues:
3046 mdl_r = self.
modelmodel.FindResidue(r.GetChain().GetName(), r.GetNumber())
3047 mdl_patch_one.AddResidue(mdl_r, mol.ViewAddFlag.INCLUDE_ALL)
3053 sel = repr_mdl.Select(f
"(cname!={mol.QueryQuoteName(mdl_ch)})")
3054 close_stuff = sel.FindWithin(r_pos, 8)
3057 for close_at
in close_stuff:
3060 min_pos = close_at.GetPos()
3064 q1 = f
"(cname!={mol.QueryQuoteName(mdl_ch)} and 8 <> {{{min_pos[0]},{min_pos[1]},{min_pos[2]}}})"
3066 q2 = f
"(12 <> [cname={mol.QueryQuoteName(mdl_ch)}])"
3067 mdl_patch_two = self.
modelmodel.CreateEmptyView()
3068 sel = repr_mdl.Select(
" and ".join([q1, q2]))
3069 for r
in sel.residues:
3070 mdl_r = self.
modelmodel.FindResidue(r.GetChain().GetName(), r.GetNumber())
3071 mdl_patch_two.AddResidue(mdl_r, mol.ViewAddFlag.INCLUDE_ALL)
3074 flat_mapping = self.
mappingmapping.GetFlatMapping(mdl_as_key=
True)
3075 full_trg_coverage =
True
3076 trg_patch_one = self.
mappingmapping.target.CreateEmptyView()
3077 for r
in mdl_patch_one.residues:
3079 mdl_cname = r.GetChain().GetName()
3080 if mdl_cname
in flat_mapping:
3081 aln = self.
mappingmapping.alns[(flat_mapping[mdl_cname], mdl_cname)]
3082 for x,y
in _GetAlignedResidues(aln,
3085 if y.GetNumber() == r.GetNumber():
3089 if trg_r
is not None:
3090 trg_patch_one.AddResidue(trg_r.handle,
3091 mol.ViewAddFlag.INCLUDE_ALL)
3093 full_trg_coverage =
False
3095 trg_patch_two = self.
mappingmapping.target.CreateEmptyView()
3096 for r
in mdl_patch_two.residues:
3098 mdl_cname = r.GetChain().GetName()
3099 if mdl_cname
in flat_mapping:
3100 aln = self.
mappingmapping.alns[(flat_mapping[mdl_cname], mdl_cname)]
3101 for x,y
in _GetAlignedResidues(aln,
3104 if y.GetNumber() == r.GetNumber():
3108 if trg_r
is not None:
3109 trg_patch_two.AddResidue(trg_r.handle,
3110 mol.ViewAddFlag.INCLUDE_ALL)
3112 full_trg_coverage =
False
3114 return (full_trg_coverage, mdl_patch_one, mdl_patch_two,
3115 trg_patch_one, trg_patch_two)
3117 def _compute_patchqs_scores(self):
3118 LogScript(
"Computing patch QS-scores")
3124 r = self.
modelmodel.FindResidue(cname, rnum)
3125 if r.IsValid()
and r.GetChemType() == mol.ChemType.AMINOACIDS:
3126 full_trg_coverage, mdl_patch_one, mdl_patch_two, \
3127 trg_patch_one, trg_patch_two = \
3129 if full_trg_coverage:
3130 score = self.
_patchqs_patchqs(mdl_patch_one, mdl_patch_two,
3131 trg_patch_one, trg_patch_two)
3132 scores.append(score)
3135 def _compute_patchdockq_scores(self):
3136 LogScript(
"Computing patch DockQ scores")
3142 r = self.
modelmodel.FindResidue(cname, rnum)
3143 if r.IsValid()
and r.GetChemType() == mol.ChemType.AMINOACIDS:
3144 full_trg_coverage, mdl_patch_one, mdl_patch_two, \
3145 trg_patch_one, trg_patch_two = \
3147 if full_trg_coverage:
3148 score = self.
_patchdockq_patchdockq(mdl_patch_one, mdl_patch_two,
3149 trg_patch_one, trg_patch_two)
3150 scores.append(score)
3153 def _patchqs(self, mdl_patch_one, mdl_patch_two, trg_patch_one, trg_patch_two):
3154 """ Score interface residue patches with QS-score
3156 In detail: Construct two entities with two chains each. First chain
3157 consists of residues from <x>_patch_one and second chain consists of
3158 <x>_patch_two. The returned score is the QS-score between the two
3161 :param mdl_patch_one: Interface patch representing scored residue
3162 :type mdl_patch_one: :class:`ost.mol.EntityView`
3163 :param mdl_patch_two: Interface patch representing scored residue
3164 :type mdl_patch_two: :class:`ost.mol.EntityView`
3165 :param trg_patch_one: Interface patch representing scored residue
3166 :type trg_patch_one: :class:`ost.mol.EntityView`
3167 :param trg_patch_two: Interface patch representing scored residue
3168 :type trg_patch_two: :class:`ost.mol.EntityView`
3169 :returns: PatchQS score
3174 alnA = seq.CreateAlignment()
3175 s =
''.join([r.one_letter_code
for r
in mdl_patch_one.residues])
3176 alnA.AddSequence(seq.CreateSequence(
"A", s))
3177 s =
''.join([r.one_letter_code
for r
in trg_patch_one.residues])
3178 alnA.AddSequence(seq.CreateSequence(
"A", s))
3180 alnB = seq.CreateAlignment()
3181 s =
''.join([r.one_letter_code
for r
in mdl_patch_two.residues])
3182 alnB.AddSequence(seq.CreateSequence(
"B", s))
3183 s =
''.join([r.one_letter_code
for r
in trg_patch_two.residues])
3184 alnB.AddSequence(seq.CreateSequence(
"B", s))
3185 alns = {(
"A",
"A"): alnA, (
"B",
"B"): alnB}
3187 scorer =
QSScorer(qs_ent_mdl, [[
"A"], [
"B"]], qs_ent_trg, alns)
3188 score_result = scorer.Score([[
"A"], [
"B"]])
3190 return score_result.QS_global
3192 def _patchdockq(self, mdl_patch_one, mdl_patch_two, trg_patch_one,
3194 """ Score interface residue patches with DockQ
3196 In detail: Construct two entities with two chains each. First chain
3197 consists of residues from <x>_patch_one and second chain consists of
3198 <x>_patch_two. The returned score is the QS-score between the two
3201 :param mdl_patch_one: Interface patch representing scored residue
3202 :type mdl_patch_one: :class:`ost.mol.EntityView`
3203 :param mdl_patch_two: Interface patch representing scored residue
3204 :type mdl_patch_two: :class:`ost.mol.EntityView`
3205 :param trg_patch_one: Interface patch representing scored residue
3206 :type trg_patch_one: :class:`ost.mol.EntityView`
3207 :param trg_patch_two: Interface patch representing scored residue
3208 :type trg_patch_two: :class:`ost.mol.EntityView`
3209 :returns: DockQ score
3213 dockq_result = dockq.DockQ(t, m,
"A",
"B",
"A",
"B")
3214 if dockq_result[
"nnat"] > 0:
3215 return dockq_result[
"DockQ"]
3218 def _qs_ent_from_patches(self, patch_one, patch_two):
3219 """ Constructs Entity with two chains named "A" and "B""
3221 Blindly adds all residues from *patch_one* to chain A and residues from
3222 patch_two to chain B.
3224 ent = mol.CreateEntity()
3226 added_ch = ed.InsertChain(
"A")
3227 for r
in patch_one.residues:
3228 added_r = ed.AppendResidue(added_ch, r.GetName())
3229 added_r.SetChemClass(str(r.GetChemClass()))
3231 ed.InsertAtom(added_r, a.handle)
3232 added_ch = ed.InsertChain(
"B")
3233 for r
in patch_two.residues:
3234 added_r = ed.AppendResidue(added_ch, r.GetName())
3235 added_r.SetChemClass(str(r.GetChemClass()))
3237 ed.InsertAtom(added_r, a.handle)
3240 def _construct_custom_mapping(self, mapping):
3241 """ constructs a full blown MappingResult object from simple dict
3243 :param mapping: mapping with trg chains as key and mdl ch as values
3244 :type mapping: :class:`dict`
3246 LogInfo(
"Setting custom chain mapping")
3249 chem_mapping, chem_group_alns, mdl_chains_without_chem_mapping, mdl = \
3250 chain_mapper.GetChemMapping(self.
modelmodel)
3255 if len(mapping) != len(set(mapping.keys())):
3256 raise RuntimeError(f
"Expect unique trg chain names in mapping. Got "
3257 f
"{mapping.keys()}")
3258 if len(mapping) != len(set(mapping.values())):
3259 raise RuntimeError(f
"Expect unique mdl chain names in mapping. Got "
3260 f
"{mapping.values()}")
3262 trg_chains = set([ch.GetName()
for ch
in chain_mapper.target.chains])
3263 mdl_chains = set([ch.GetName()
for ch
in mdl.chains])
3264 for k,v
in mapping.items():
3265 if k
not in trg_chains:
3266 raise RuntimeError(f
"Target chain \"{k}\" is not available "
3267 f
"in target processed for chain mapping "
3269 if v
not in mdl_chains:
3270 raise RuntimeError(f
"Model chain \"{v}\" is not available "
3271 f
"in model processed for chain mapping "
3274 for trg_ch, mdl_ch
in mapping.items():
3275 trg_group_idx =
None
3276 mdl_group_idx =
None
3277 for idx, group
in enumerate(chain_mapper.chem_groups):
3281 for idx, group
in enumerate(chem_mapping):
3285 if trg_group_idx
is None or mdl_group_idx
is None:
3286 raise RuntimeError(
"Could not establish a valid chem grouping "
3287 "of chain names provided in custom mapping.")
3289 if trg_group_idx != mdl_group_idx:
3290 raise RuntimeError(f
"Chem group mismatch in custom mapping: "
3291 f
"target chain \"{trg_ch}\" groups with the "
3292 f
"following chemically equivalent target "
3294 f
"{chain_mapper.chem_groups[trg_group_idx]} "
3295 f
"but model chain \"{mdl_ch}\" maps to the "
3296 f
"following target chains: "
3297 f
"{chain_mapper.chem_groups[mdl_group_idx]}")
3299 pairs = set([(trg_ch, mdl_ch)
for trg_ch, mdl_ch
in mapping.items()])
3301 chain_mapping._GetRefMdlAlns(chain_mapper.chem_groups,
3302 chain_mapper.chem_group_alignments,
3308 final_mapping = list()
3309 for ref_chains
in chain_mapper.chem_groups:
3310 mapped_mdl_chains = list()
3311 for ref_ch
in ref_chains:
3312 if ref_ch
in mapping:
3313 mapped_mdl_chains.append(mapping[ref_ch])
3315 mapped_mdl_chains.append(
None)
3316 final_mapping.append(mapped_mdl_chains)
3319 for ref_group, mdl_group
in zip(chain_mapper.chem_groups,
3321 for ref_ch, mdl_ch
in zip(ref_group, mdl_group):
3322 if ref_ch
is not None and mdl_ch
is not None:
3323 aln = ref_mdl_alns[(ref_ch, mdl_ch)]
3324 trg_view = chain_mapper.target.Select(f
"cname={mol.QueryQuoteName(ref_ch)}")
3325 mdl_view = mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch)}")
3326 aln.AttachView(0, trg_view)
3327 aln.AttachView(1, mdl_view)
3328 alns[(ref_ch, mdl_ch)] = aln
3331 chain_mapper.chem_groups,
3333 mdl_chains_without_chem_mapping,
3334 final_mapping, alns)
3336 def _compute_tmscore(self):
3339 LogScript(
"Computing TM-score with built-in USalign")
3341 flat_mapping = self.
rigid_mappingrigid_mapping.GetFlatMapping()
3342 LogInfo(
"Overriding TM-score chain mapping")
3343 res = res = bindings.WrappedMMAlign(self.
modelmodel, self.
targettarget,
3344 mapping=flat_mapping)
3346 res = bindings.WrappedMMAlign(self.
modelmodel, self.
targettarget)
3348 LogScript(
"Computing TM-score with USalign exectuable")
3350 LogInfo(
"Overriding TM-score chain mapping")
3351 flat_mapping = self.
rigid_mappingrigid_mapping.GetFlatMapping()
3352 res = tmtools.USAlign(self.
modelmodel, self.
targettarget,
3354 custom_chain_mapping = flat_mapping)
3356 res = tmtools.USAlign(self.
modelmodel, self.
targettarget,
3361 for a,b
in zip(res.ent1_mapped_chains, res.ent2_mapped_chains):
3364 def _resnum_connect(self, ent):
3366 for ch
in ent.chains:
3367 res_list = ch.residues
3368 for i
in range(len(res_list) - 1):
3371 if ra.GetNumber().GetNum() + 1 == rb.GetNumber().GetNum():
3372 if ra.IsPeptideLinking()
and rb.IsPeptideLinking():
3373 c = ra.FindAtom(
"C")
3374 n = rb.FindAtom(
"N")
3375 if c.IsValid()
and n.IsValid()
and not mol.BondExists(c, n):
3377 ed = ent.EditXCS(mol.BUFFERED_EDIT)
3379 elif ra.IsNucleotideLinking()
and rb.IsNucleotideLinking():
3380 o = ra.FindAtom(
"O3'")
3381 p = rb.FindAtom(
"P")
3382 if o.IsValid()
and p.IsValid()
and not mol.BondExists(o, p):
3384 ed = ent.EditXCS(mol.BUFFERED_EDIT)
3389 __all__ = (
'lDDTBSScorer',
'Scorer',)
def target_interface_residues(self)
_per_interface_ips_trimmed
def _patchqs(self, mdl_patch_one, mdl_patch_two, trg_patch_one, trg_patch_two)
_per_interface_ips_recall
def per_interface_ics(self)
def target_bad_angles(self)
def _get_repr_view(self, ent)
def qs_target_interfaces(self)
def rigid_mapped_model_pos(self)
def _do_stereochecks(self)
def _compute_per_interface_qs_scores(self)
def model_bad_angles(self)
_rigid_mapped_target_pos_full_bb
_per_interface_ics_recall
_per_interface_ips_precision
def model_interface_residues(self)
_contact_target_interfaces
def stereochecked_target(self)
def per_interface_ips_precision(self)
_mapped_model_pos_full_bb
def dockq_wave_full(self)
def mapped_model_pos(self)
def rigid_mapped_target_pos_full_bb(self)
_per_interface_ics_precision_trimmed
def _compute_dockq_scores(self)
def _aln_helper(self, target, model)
def model_bad_bonds(self)
def per_interface_ics_recall(self)
def ics_precision_trimmed(self)
def _compute_bb_lddt(self)
def transformed_mapped_model_pos(self)
_per_interface_ips_recall_trimmed
_per_interface_ics_recall_trimmed
def usalign_mapping(self)
def native_contacts(self)
def local_cad_score(self)
def _compute_patchqs_scores(self)
def per_interface_ips(self)
def _compute_stereochecked_aln(self)
_rigid_transformed_mapped_model_pos
def rigid_transformed_mapped_model_pos(self)
def trimmed_model_contacts(self)
def _compute_ips_scores_trimmed(self)
def n_target_not_mapped(self)
def per_interface_ips_trimmed(self)
def contact_model_interfaces(self)
_transformed_mapped_model_pos
def _patchdockq(self, mdl_patch_one, mdl_patch_two, trg_patch_one, trg_patch_two)
_rigid_mapped_model_pos_full_bb
def dockq_interfaces(self)
def per_interface_ips_recall(self)
def per_interface_ics_precision(self)
def _compute_ics_scores(self)
def _resnum_connect(self, ent)
_rigid_n_target_not_mapped
def _get_interface_residues(self, ent)
def ips_precision_trimmed(self)
def __init__(self, model, target, resnum_alignments=False, molck_settings=None, cad_score_exec=None, custom_mapping=None, custom_rigid_mapping=None, usalign_exec=None, lddt_no_stereochecks=False, n_max_naive=40320, oum=False, min_pep_length=6, min_nuc_length=4, lddt_add_mdl_contacts=False, dockq_capri_peptide=False, lddt_symmetry_settings=None, lddt_inclusion_radius=15.0, pep_seqid_thr=95., nuc_seqid_thr=95., mdl_map_pep_seqid_thr=0., mdl_map_nuc_seqid_thr=0., seqres=None, trg_seqres_mapping=None)
def rigid_transform(self)
_mapped_target_pos_full_bb
def ics_recall_trimmed(self)
def mapped_target_pos_full_bb(self)
def _compute_ips_scores(self)
def _extract_mapped_pos(self)
def rigid_mapped_target_pos(self)
def _compute_tmscore(self)
def dockq_target_interfaces(self)
def stereochecked_aln(self)
def trimmed_contact_scorer(self)
def per_interface_ics_recall_trimmed(self)
_per_interface_ics_precision
def _compute_patchdockq_scores(self)
def _get_interface_patches(self, mdl_ch, mdl_rnum)
def _extract_rigid_mapped_pos(self)
_contact_model_interfaces
_target_interface_residues
def contact_target_interfaces(self)
def per_interface_qs_global(self)
def _qs_ent_from_patches(self, patch_one, patch_two)
def rigid_mapped_model_pos_full_bb(self)
def stereochecked_model(self)
_model_interface_residues
def _compute_pepnuc_aln(self)
def per_interface_qs_best(self)
def _extract_rigid_mapped_pos_full_bb(self)
def per_interface_ips_precision_trimmed(self)
def _compute_ics_scores_trimmed(self)
_per_interface_ips_precision_trimmed
def rigid_n_target_not_mapped(self)
def per_interface_ics_precision_trimmed(self)
def per_interface_ips_recall_trimmed(self)
def mapped_target_pos(self)
def mapped_model_pos_full_bb(self)
def target_bad_bonds(self)
def qs_model_interfaces(self)
def _compute_cad_score(self)
def ips_recall_trimmed(self)
def per_interface_ics_trimmed(self)
_per_interface_ics_trimmed
def _construct_custom_mapping(self, mapping)
def _extract_mapped_pos_full_bb(self)
def ScoreBS(self, ligand, radius=4.0, lddt_radius=10.0)
def __init__(self, reference, model, residue_number_alignment=False)
Real DLLEXPORT_OST_GEOM Distance(const Line2 &l, const Vec2 &v)
void Molck(ost::mol::EntityHandle &ent, ost::conop::CompoundLibPtr lib, const MolckSettings &settings, bool prune=true)
void GDT(const geom::Vec3List &mdl_pos, const geom::Vec3List &ref_pos, int window_size, int max_windows, Real distance_thresh, int &n_superposed, geom::Mat4 &transform)