OpenStructure
Loading...
Searching...
No Matches
qsscoring.py
Go to the documentation of this file.
1"""
2Scoring of quaternary structures (QS). The QS scoring is according to the paper
3by `Bertoni et al. <https://dx.doi.org/10.1038/s41598-017-09654-8>`_.
4
5.. warning::
6
7 The `qsscoring` module is deprecated. Consider using the newer implementation
8 in :mod:`~ost.mol.alg.qsscore` instead.
9
10.. note ::
11
12 Requirements for use:
13
14 - A default :class:`compound library <ost.conop.CompoundLib>` must be defined
15 and accessible via :func:`~ost.conop.GetDefaultLib`. This is set by default
16 when executing scripts with ``ost``. Otherwise, you must set this with
17 :func:`~ost.conop.SetDefaultLib`.
18 - ClustalW must be installed (unless you provide chain mappings)
19 - Python modules `numpy` and `scipy` must be installed and available
20 (e.g. use ``pip install scipy numpy``)
21"""
22# Original authors: Gerardo Tauriello, Martino Bertoni
23
24from ost import mol, geom, conop, seq, settings, PushVerbosityLevel
25from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
26from ost.bindings.clustalw import ClustalW
27from ost.mol.alg import lDDTScorer
28from ost.seq.alg.renumber import Renumber
29import numpy as np
30from scipy.special import factorial
31from scipy.special import binom
32from scipy.cluster.hierarchy import fclusterdata
33import itertools
34
35
38
39class QSscoreError(Exception):
40 """Exception to be raised for "acceptable" exceptions in QS scoring.
41
42 Those are cases we might want to capture for default behavior.
43 """
44 pass
45
47 """Object to compute QS scores.
48
49 Simple usage without any precomputed contacts, symmetries and mappings:
50
51 .. code-block:: python
52
53 import ost
54 from ost.mol.alg import qsscoring
55
56 # load two biounits to compare
57 ent_full = ost.io.LoadPDB('3ia3', remote=True)
58 ent_1 = ent_full.Select('cname=A,D')
59 ent_2 = ent_full.Select('cname=B,C')
60 # get score
61 ost.PushVerbosityLevel(3)
62 try:
63 qs_scorer = qsscoring.QSscorer(ent_1, ent_2)
64 ost.LogScript('QSscore:', str(qs_scorer.global_score))
65 ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping))
66 # commonly you want the QS global score as output
67 qs_score = qs_scorer.global_score
68 except qsscoring.QSscoreError as ex:
69 # default handling: report failure and set score to 0
70 ost.LogError('QSscore failed:', str(ex))
71 qs_score = 0
72
73 For maximal performance when computing QS scores of the same entity with many
74 others, it is advisable to construct and reuse :class:`QSscoreEntity` objects.
75
76 Any known / precomputed information can be filled into the appropriate
77 attribute here (no checks done!). Otherwise most quantities are computed on
78 first access and cached (lazy evaluation). Setters are provided to set values
79 with extra checks (e.g. :func:`SetSymmetries`).
80
81 All necessary seq. alignments are done by global BLOSUM62-based alignment. A
82 multiple sequence alignment is performed with ClustalW unless
83 :attr:`chain_mapping` is provided manually. You will need to have an
84 executable ``clustalw`` or ``clustalw2`` in your ``PATH`` or you must set
85 :attr:`clustalw_bin` accordingly. Otherwise an exception
86 (:class:`ost.settings.FileNotFound`) is thrown.
87
88 Formulas for QS scores:
89
90 ::
91
92 - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
93 - QS_global = weighted_scores / (weight_sum + weight_extra_all)
94 -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
95 -> weight_sum = sum(w(min(d1,d2))) for shared
96 -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
97 -> weight_extra_all = sum(w(d)) for all non-shared
98 -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
99
100 In the formulas above:
101
102 * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
103 "shared" contacts).
104 * "mapped": we could map chains of two structures and align residues in
105 :attr:`alignments`.
106 * "shared": pairs of residues which are "mapped" and have
107 "inter-chain contact" in both structures.
108 * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
109 (fallback to CA-CA if :attr:`calpha_only` is True).
110 * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
111 from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
112
113 :param ent_1: First structure to be scored.
114 :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
115 :class:`~ost.mol.EntityView`
116 :param ent_2: Second structure to be scored.
117 :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
118 :class:`~ost.mol.EntityView`
119 :param res_num_alignment: Sets :attr:`res_num_alignment`
120
121 :raises: :class:`QSscoreError` if input structures are invalid or are monomers
122 or have issues that make it impossible for a QS score to be computed.
123
124 .. attribute:: qs_ent_1
125
126 :class:`QSscoreEntity` object for *ent_1* given at construction.
127 If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
128 set it to 'pdb_1' using :func:`~QSscoreEntity.SetName`.
129
130 .. attribute:: qs_ent_2
131
132 :class:`QSscoreEntity` object for *ent_2* given at construction.
133 If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
134 set it to 'pdb_2' using :func:`~QSscoreEntity.SetName`.
135
136 .. attribute:: calpha_only
137
138 True if any of the two structures is CA-only (after cleanup).
139
140 :type: :class:`bool`
141
142 .. attribute:: max_ca_per_chain_for_cm
143
144 Maximal number of CA atoms to use in each chain to determine chain mappings.
145 Setting this to -1 disables the limit. Limiting it speeds up determination
146 of symmetries and chain mappings. By default it is set to 100.
147
148 :type: :class:`int`
149
150 .. attribute:: max_mappings_extensive
151
152 Maximal number of chain mappings to test for 'extensive'
153 :attr:`chain_mapping_scheme`. The extensive chain mapping search must in the
154 worst case check O(N^2) * O(N!) possible mappings for complexes with N
155 chains. Two octamers without symmetry would require 322560 mappings to be
156 checked. To limit computations, a :class:`QSscoreError` is thrown if we try
157 more than the maximal number of chain mappings.
158 The value must be set before the first use of :attr:`chain_mapping`.
159 By default it is set to 100000.
160
161 :type: :class:`int`
162
163 .. attribute:: res_num_alignment
164
165 Forces each alignment in :attr:`alignments` to be based on residue numbers
166 instead of using a global BLOSUM62-based alignment.
167
168 :type: :class:`bool`
169 """
170 def __init__(self, ent_1, ent_2, res_num_alignment=False):
171 # generate QSscoreEntity objects?
172 if isinstance(ent_1, QSscoreEntity):
173 self.qs_ent_1 = ent_1
174 else:
175 self.qs_ent_1 = QSscoreEntity(ent_1)
176 if isinstance(ent_2, QSscoreEntity):
177 self.qs_ent_2 = ent_2
178 else:
179 self.qs_ent_2 = QSscoreEntity(ent_2)
180 # check validity of inputs
181 if not self.qs_ent_1.is_valid or not self.qs_ent_2.is_valid:
182 raise QSscoreError("Invalid input in QSscorer!")
183 # set names for structures
184 if self.qs_ent_1.original_name == self.qs_ent_2.original_name:
185 self.qs_ent_1.SetName('pdb_1')
186 self.qs_ent_2.SetName('pdb_2')
187 else:
188 self.qs_ent_1.SetName(self.qs_ent_1.original_name)
189 self.qs_ent_2.SetName(self.qs_ent_2.original_name)
190 # set other public attributes
191 self.res_num_alignment = res_num_alignment
192 self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only
195 # init cached stuff
196 self._chem_mapping = None
197 self._ent_to_cm_1 = None
198 self._ent_to_cm_2 = None
199 self._symm_1 = None
200 self._symm_2 = None
201 self._chain_mapping = None
203 self._alignments = None
205 self._global_score = None
206 self._best_score = None
207 self._superposition = None
208 self._clustalw_bin = None
209
210 @property
211 def chem_mapping(self):
212 """Inter-complex mapping of chemical groups.
213
214 Each group (see :attr:`QSscoreEntity.chem_groups`) is mapped according to
215 highest sequence identity. Alignment is between longest sequences in groups.
216
217 Limitations:
218
219 - If different numbers of groups, we map only the groups for the complex
220 with less groups (rest considered unmapped and shown as warning)
221 - The mapping is forced: the "best" mapping will be chosen independently of
222 how low the seq. identity may be
223
224 :getter: Computed on first use (cached)
225 :type: :class:`dict` with key = :class:`tuple` of chain names in
226 :attr:`qs_ent_1` and value = :class:`tuple` of chain names in
227 :attr:`qs_ent_2`.
228
229 :raises: :class:`QSscoreError` if we end up having no chains for either
230 entity in the mapping (can happen if chains do not have CA atoms).
231 """
232 if self._chem_mapping is None:
234 return self._chem_mapping
235
236 @chem_mapping.setter
237 def chem_mapping(self, chem_mapping):
238 self._chem_mapping = chem_mapping
239
240 @property
241 def ent_to_cm_1(self):
242 """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries.
243
244 Properties:
245
246 - Includes only residues aligned according to :attr:`chem_mapping`
247 - Includes only 1 CA atom per residue
248 - Has at least 5 and at most :attr:`max_ca_per_chain_for_cm` atoms per chain
249 - All chains of the same chemical group have the same number of atoms
250 (also in :attr:`ent_to_cm_2` according to :attr:`chem_mapping`)
251 - All chains appearing in :attr:`chem_mapping` appear in this entity
252 (so the two can be safely used together)
253
254 This entity might be transformed (i.e. all positions rotated/translated by
255 same transformation matrix) if this can speed up computations. So do not
256 assume fixed global positions (but relative distances will remain fixed).
257
258 :getter: Computed on first use (cached)
259 :type: :class:`~ost.mol.EntityHandle`
260
261 :raises: :class:`QSscoreError` if any chain ends up having less than 5 res.
262 """
263 if self._ent_to_cm_1 is None:
265 return self._ent_to_cm_1
266
267 @ent_to_cm_1.setter
268 def ent_to_cm_1(self, ent_to_cm_1):
269 self._ent_to_cm_1 = ent_to_cm_1
270
271 @property
272 def ent_to_cm_2(self):
273 """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries
274 (see :attr:`ent_to_cm_1` for details).
275 """
276 if self._ent_to_cm_2 is None:
278 return self._ent_to_cm_2
279
280 @ent_to_cm_2.setter
281 def ent_to_cm_2(self, ent_to_cm_2):
282 self._ent_to_cm_2 = ent_to_cm_2
283
284 @property
285 def symm_1(self):
286 """Symmetry groups for :attr:`qs_ent_1` used to speed up chain mapping.
287
288 This is a list of chain-lists where each chain-list can be used reconstruct
289 the others via cyclic C or dihedral D symmetry. The first chain-list is used
290 as a representative symmetry group. For heteromers, the group-members must
291 contain all different seqres in oligomer.
292
293 Example: symm. groups [(A,B,C), (D,E,F), (G,H,I)] means that there are
294 symmetry transformations to get (D,E,F) and (G,H,I) from (A,B,C).
295
296 Properties:
297
298 - All symmetry group tuples have the same length (num. of chains)
299 - All chains in :attr:`ent_to_cm_1` appear (w/o duplicates)
300 - For heteros: symmetry group tuples have all different chem. groups
301 - Trivial symmetry group = one tuple with all chains (used if inconsistent
302 data provided or if no symmetry is found)
303 - Either compatible to :attr:`symm_2` or trivial symmetry groups used.
304 Compatibility requires same lengths of symmetry group tuples and it must
305 be possible to get an overlap (80% of residues covered within 6 A of a
306 (chem. mapped) chain) of all chains in representative symmetry groups by
307 superposing one pair of chains.
308
309 :getter: Computed on first use (cached)
310 :type: :class:`list` of :class:`tuple` of :class:`str` (chain names)
311 """
312 if self._symm_1 is None:
313 self._ComputeSymmetry()
314 return self._symm_1
315
316 @property
317 def symm_2(self):
318 """Symmetry groups for :attr:`qs_ent_2` (see :attr:`symm_1` for details)."""
319 if self._symm_2 is None:
320 self._ComputeSymmetry()
321 return self._symm_2
322
323 def SetSymmetries(self, symm_1, symm_2):
324 """Set user-provided symmetry groups.
325
326 These groups are restricted to chain names appearing in :attr:`ent_to_cm_1`
327 and :attr:`ent_to_cm_2` respectively. They are only valid if they cover all
328 chains and both *symm_1* and *symm_2* have same lengths of symmetry group
329 tuples. Otherwise trivial symmetry group used (see :attr:`symm_1`).
330
331 :param symm_1: Value to set for :attr:`symm_1`.
332 :param symm_2: Value to set for :attr:`symm_2`.
333 """
334 # restrict chain names
337 # check that we have reasonable symmetries set (fallback: all chains)
338 if not _AreValidSymmetries(self._symm_1, self._symm_2):
339 self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1ent_to_cm_1ent_to_cm_1.chains)]
340 self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2ent_to_cm_2ent_to_cm_2.chains)]
341
342 @property
343 def chain_mapping(self):
344 """Mapping from :attr:`ent_to_cm_1` to :attr:`ent_to_cm_2`.
345
346 Properties:
347
348 - Mapping is between chains of same chem. group (see :attr:`chem_mapping`)
349 - Each chain can appear only once in mapping
350 - All chains of complex with less chains are mapped
351 - Symmetry (:attr:`symm_1`, :attr:`symm_2`) is taken into account
352
353 Details on algorithms used to find mapping:
354
355 - We try all pairs of chem. mapped chains within symmetry group and get
356 superpose-transformation for them
357 - First option: check for "sufficient overlap" of other chain-pairs
358
359 - For each chain-pair defined above: apply superposition to full oligomer
360 and map chains based on structural overlap
361 - Structural overlap = X% of residues in second oligomer covered within Y
362 Angstrom of a (chem. mapped) chain in first oligomer. We successively
363 try (X,Y) = (80,4), (40,6) and (20,8) to be less and less strict in
364 mapping (warning shown for most permissive one).
365 - If multiple possible mappings are found, we choose the one which leads
366 to the lowest multi-chain-RMSD given the superposition
367
368 - Fallback option: try all mappings to find minimal multi-chain-RMSD
369 (warning shown)
370
371 - For each chain-pair defined above: apply superposition, try all (!)
372 possible chain mappings (within symmetry group) and keep mapping with
373 lowest multi-chain-RMSD
374 - Repeat procedure above to resolve symmetry. Within the symmetry group we
375 can use the chain mapping computed before and we just need to find which
376 symmetry group in first oligomer maps to which in the second one. We
377 again try all possible combinations...
378 - Limitations:
379
380 - Trying all possible mappings is a combinatorial nightmare (factorial).
381 We throw an exception if too many combinations (e.g. octomer vs
382 octomer with no usable symmetry)
383 - The mapping is forced: the "best" mapping will be chosen independently
384 of how badly they fit in terms of multi-chain-RMSD
385 - As a result, such a forced mapping can lead to a large range of
386 resulting QS scores. An extreme example was observed between 1on3.1
387 and 3u9r.1, where :attr:`global_score` can range from 0.12 to 0.43
388 for mappings with very similar multi-chain-RMSD.
389
390 :getter: Computed on first use (cached)
391 :type: :class:`dict` with key / value = :class:`str` (chain names, key
392 for :attr:`ent_to_cm_1`, value for :attr:`ent_to_cm_2`)
393 :raises: :class:`QSscoreError` if there are too many combinations to check
394 to find a chain mapping (see :attr:`max_mappings_extensive`).
395 """
396 if self._chain_mapping is None:
401 LogInfo('Mapping found: %s' % str(self._chain_mapping))
402 return self._chain_mapping
403
404 @chain_mapping.setter
405 def chain_mapping(self, chain_mapping):
406 self._chain_mapping = chain_mapping
407
408 @property
410 """Mapping scheme used to get :attr:`chain_mapping`.
411
412 Possible values:
413
414 - 'strict': 80% overlap needed within 4 Angstrom (overlap based mapping).
415 - 'tolerant': 40% overlap needed within 6 Angstrom (overlap based mapping).
416 - 'permissive': 20% overlap needed within 8 Angstrom (overlap based
417 mapping). It's best if you check mapping manually!
418 - 'extensive': Extensive search used for mapping detection (fallback). This
419 approach has known limitations and may be removed in future versions.
420 Mapping should be checked manually!
421 - 'user': :attr:`chain_mapping` was set by user before first use of this
422 attribute.
423
424 :getter: Computed with :attr:`chain_mapping` on first use (cached)
425 :type: :class:`str`
426 :raises: :class:`QSscoreError` as in :attr:`chain_mapping`.
427 """
428 if self._chain_mapping_scheme is None:
429 # default: user provided
430 self._chain_mapping_scheme = 'user'
431 # get chain mapping and make sure internal variable is set
432 # -> will not compute and only update _chain_mapping if user provided
433 # -> will compute and overwrite _chain_mapping_scheme else
435 return self._chain_mapping_scheme
436
437 @property
438 def alignments(self):
439 """List of successful sequence alignments using :attr:`chain_mapping`.
440
441 There will be one alignment for each mapped chain and they are ordered by
442 their chain names in :attr:`qs_ent_1`.
443
444 The first sequence of each alignment belongs to :attr:`qs_ent_1` and the
445 second one to :attr:`qs_ent_2`. The sequences are named according to the
446 mapped chain names and have views attached into :attr:`QSscoreEntity.ent`
447 of :attr:`qs_ent_1` and :attr:`qs_ent_2`.
448
449 If :attr:`res_num_alignment` is False, each alignment is performed using a
450 global BLOSUM62-based alignment. Otherwise, the positions in the alignment
451 sequences are simply given by the residue number so that residues with
452 matching numbers are aligned.
453
454 :getter: Computed on first use (cached)
455 :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
456 """
457 if self._alignments is None:
459 self.qs_ent_2.ent,
462 return self._alignments
463
464 @alignments.setter
465 def alignments(self, alignments):
466 self._alignments = alignments
467
468 @property
470 """Mapping of shared residues in :attr:`alignments`.
471
472 :getter: Computed on first use (cached)
473 :type: :class:`dict` *mapped_residues[c1][r1] = r2* with:
474 *c1* = Chain name in first entity (= first sequence in aln),
475 *r1* = Residue number in first entity,
476 *r2* = Residue number in second entity
477 """
478 if self._mapped_residues is None:
480 return self._mapped_residues
481
482 @mapped_residues.setter
483 def mapped_residues(self, mapped_residues):
484 self._mapped_residues = mapped_residues
485
486 @property
487 def global_score(self):
488 """QS-score with penalties.
489
490 The range of the score is between 0 (i.e. no interface residues are shared
491 between biounits) and 1 (i.e. the interfaces are identical).
492
493 The global QS-score is computed applying penalties when interface residues
494 or entire chains are missing (i.e. anything that is not mapped in
495 :attr:`mapped_residues` / :attr:`chain_mapping`) in one of the biounits.
496
497 :getter: Computed on first use (cached)
498 :type: :class:`float`
499 :raises: :class:`QSscoreError` if only one chain is mapped
500 """
501 if self._global_score is None:
502 self._ComputeScores()
503 return self._global_score
504
505 @property
506 def best_score(self):
507 """QS-score without penalties.
508
509 Like :attr:`global_score`, but neglecting additional residues or chains in
510 one of the biounits (i.e. the score is calculated considering only mapped
511 chains and residues).
512
513 :getter: Computed on first use (cached)
514 :type: :class:`float`
515 :raises: :class:`QSscoreError` if only one chain is mapped
516 """
517 if self._best_score is None:
518 self._ComputeScores()
519 return self._best_score
520
521 @property
522 def superposition(self):
523 """Superposition result based on shared CA atoms in :attr:`alignments`.
524
525 The superposition can be used to map :attr:`QSscoreEntity.ent` of
526 :attr:`qs_ent_1` onto the one of :attr:`qs_ent_2`. Use
527 :func:`ost.geom.Invert` if you need the opposite transformation.
528
529 :getter: Computed on first use (cached)
530 :type: :class:`ost.mol.alg.SuperpositionResult`
531 """
532 if self._superposition is None:
534 # report it
535 sup_rmsd = self._superposition.rmsd
536 cmp_view = self._superposition.view1
537 LogInfo('CA RMSD for %s aligned residues on %s chains: %.2f' \
538 % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd))
539 return self._superposition
540
541 @property
542 def clustalw_bin(self):
543 """
544 Full path to ``clustalw`` or ``clustalw2`` executable to use for multiple
545 sequence alignments (unless :attr:`chain_mapping` is provided manually).
546
547 :getter: Located in path on first use (cached)
548 :type: :class:`str`
549 """
550 if self._clustalw_bin is None:
551 self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2'))
552 return self._clustalw_bin
553
554 @clustalw_bin.setter
555 def clustalw_bin(self, clustalw_bin):
556 self._clustalw_bin = clustalw_bin
557
558 def GetOligoLDDTScorer(self, settings, penalize_extra_chains=True):
559 """
560 :return: :class:`OligoLDDTScorer` object, setup for this QS scoring problem.
561 The scorer is set up with :attr:`qs_ent_1` as the reference and
562 :attr:`qs_ent_2` as the model.
563 :param settings: Passed to :class:`OligoLDDTScorer` constructor.
564 :param penalize_extra_chains: Passed to :class:`OligoLDDTScorer` constructor.
565 """
566 if penalize_extra_chains:
567 return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
570 else:
571 return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
572 self.alignmentsalignmentsalignments, self.calpha_only, settings, False)
573
574
575
578
580 """Fills cached ent_to_cm_1 and ent_to_cm_2."""
581 # get aligned residues via MSA
582 ev1, ev2 = _GetAlignedResidues(self.qs_ent_1, self.qs_ent_2,
586 # extract new entities
587 self._ent_to_cm_1 = mol.CreateEntityFromView(ev1, True)
588 self._ent_to_cm_2 = mol.CreateEntityFromView(ev2, True)
589 # name them
590 self._ent_to_cm_1.SetName(self.qs_ent_1.GetName())
591 self._ent_to_cm_2.SetName(self.qs_ent_2.GetName())
592
594 """Fills cached symm_1 and symm_2."""
595 # get them
596 self._symm_1, self._symm_2 = \
599 # check that we have reasonable symmetries set (fallback: all chains)
600 if not _AreValidSymmetries(self._symm_1, self._symm_2):
601 self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1ent_to_cm_1ent_to_cm_1.chains)]
602 self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2ent_to_cm_2ent_to_cm_2.chains)]
603
604 def _ComputeScores(self):
605 """Fills cached global_score and best_score."""
607 raise QSscoreError("QS-score is not defined for monomers")
608 # get contacts
609 if self.calpha_only:
610 contacts_1 = self.qs_ent_1.contacts_ca
611 contacts_2 = self.qs_ent_2.contacts_ca
612 else:
613 contacts_1 = self.qs_ent_1.contacts
614 contacts_2 = self.qs_ent_2.contacts
615 # get scores
616 scores = _GetScores(contacts_1, contacts_2, self.mapped_residuesmapped_residues,
618 self._best_score = scores[0]
619 self._global_score = scores[1]
620 # report scores
621 LogInfo('QSscore %s, %s: best: %.2f, global: %.2f' \
622 % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(),
623 self._best_score, self._global_score))
624
625
626
629
630class QSscoreEntity(object):
631 """Entity with cached entries for QS scoring.
632
633 Any known / precomputed information can be filled into the appropriate
634 attribute here as long as they are labelled as read/write. Otherwise the
635 quantities are computed on first access and cached (lazy evaluation). The
636 heaviest load is expected when computing :attr:`contacts` and
637 :attr:`contacts_ca`.
638
639 :param ent: Entity to be used for QS scoring. A copy of it will be processed.
640 :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
641
642 .. attribute:: is_valid
643
644 True, if successfully initialized. False, if input structure has no protein
645 chains with >= 20 residues.
646
647 :type: :class:`bool`
648
649 .. attribute:: original_name
650
651 Name set for *ent* when object was created.
652
653 :type: :class:`str`
654
655 .. attribute:: ent
656
657 Cleaned version of *ent* passed at construction. Hydrogens are removed, the
658 entity is processed with a :class:`~ost.conop.RuleBasedProcessor` and chains
659 listed in :attr:`removed_chains` have been removed. The name of this entity
660 might change during scoring (see :func:`GetName`). Otherwise, this will be
661 fixed.
662
663 :type: :class:`~ost.mol.EntityHandle`
664
665 .. attribute:: removed_chains
666
667 Chains removed from *ent* passed at construction. These are ligand and water
668 chains as well as small (< 20 res.) peptides or chains with no amino acids
669 (determined by chem. type, which is set by rule based processor).
670
671 :type: :class:`list` of :class:`str`
672
673 .. attribute:: calpha_only
674
675 Whether entity is CA-only (i.e. it has 0 CB atoms)
676
677 :type: :class:`bool`
678 """
679 def __init__(self, ent):
680 # copy entity and process/clean it
681 self.original_name = ent.GetName()
682 ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True)
683 if not conop.GetDefaultLib():
684 raise RuntimeError("QSscore computation requires a compound library!")
685 pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
686 pr.Process(ent)
688 # check if it's suitable for QS scoring
689 if self.ent.chain_count == 0:
690 LogError('Bad input file: ' + ent.GetName() + '. No chains left after '
691 'removing water, ligands and small peptides.')
692 self.is_valid = False
693 elif self.ent.chain_count == 1:
694 LogWarning('Structure ' + ent.GetName() + ' is a monomer.')
695 self.is_valid = True
696 else:
697 self.is_valid = True
698 # init cached stuff
699 self._ca_entity = None
700 self._ca_chains = None
701 self._alignments = {}
702 self._chem_groups = None
703 self._angles = {}
704 self._axis = {}
705 self._contacts = None
706 self._contacts_ca = None
707
708 def GetName(self):
709 """Wrapper to :func:`~ost.mol.EntityHandle.GetName` of :attr:`ent`.
710 This is used to uniquely identify the entity while scoring. The name may
711 therefore change while :attr:`original_name` remains fixed.
712 """
713 # for duck-typing and convenience
714 return self.ent.GetName()
715
716 def SetName(self, new_name):
717 """Wrapper to :func:`~ost.mol.EntityHandle.SetName` of :attr:`ent`.
718 Use this to change unique identifier while scoring (see :func:`GetName`).
719 """
720 # for duck-typing and convenience
721 self.ent.SetName(new_name)
722
723 @property
724 def ca_entity(self):
725 """
726 Reduced representation of :attr:`ent` with only CA atoms.
727 This guarantees that each included residue has exactly one atom.
728
729 :getter: Computed on first use (cached)
730 :type: :class:`~ost.mol.EntityHandle`
731 """
732 if self._ca_entity is None:
733 self._ca_entity = _GetCAOnlyEntity(self.ent)
734 return self._ca_entity
735
736 @property
737 def ca_chains(self):
738 """
739 Map of chain names in :attr:`ent` to sequences with attached view to CA-only
740 chains (into :attr:`ca_entity`). Useful for alignments and superpositions.
741
742 :getter: Computed on first use (cached)
743 :type: :class:`dict` (key = :class:`str`,
744 value = :class:`~ost.seq.SequenceHandle`)
745 """
746 if self._ca_chains is None:
747 self._ca_chains = dict()
748 ca_entity = self.ca_entity
749 for ch in ca_entity.chains:
750 self._ca_chains[ch.name] = seq.SequenceFromChain(ch.name, ch)
751 return self._ca_chains
752
753 def GetAlignment(self, c1, c2):
754 """Get sequence alignment of chain *c1* with chain *c2*.
755 Computed on first use based on :attr:`ca_chains` (cached).
756
757 :param c1: Chain name for first chain to align.
758 :type c1: :class:`str`
759 :param c2: Chain name for second chain to align.
760 :type c2: :class:`str`
761 :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
762 """
763 if (c1,c2) not in self._alignments:
764 ca_chains = self.ca_chains
765 self._alignments[(c1,c2)] = _AlignAtomSeqs(ca_chains[c1], ca_chains[c2])
766 return self._alignments[(c1,c2)]
767
768 @property
769 def chem_groups(self):
770 """
771 Intra-complex group of chemically identical (seq. id. > 95%) polypeptide
772 chains as extracted from :attr:`ca_chains`. First chain in group is the one
773 with the longest sequence.
774
775 :getter: Computed on first use (cached)
776 :type: :class:`list` of :class:`list` of :class:`str` (chain names)
777 """
778 if self._chem_groups is None:
779 self._chem_groups = _GetChemGroups(self, 95)
780 LogInfo('Chemically equivalent chain-groups in %s: %s' \
781 % (self.GetName(), str(self._chem_groups)))
782 return self._chem_groups
783
784 def GetAngles(self, c1, c2):
785 """Get Euler angles from superposition of chain *c1* with chain *c2*.
786 Computed on first use based on :attr:`ca_chains` (cached).
787
788 :param c1: Chain name for first chain to superpose.
789 :type c1: :class:`str`
790 :param c2: Chain name for second chain to superpose.
791 :type c2: :class:`str`
792 :return: 3 Euler angles (may contain nan if something fails).
793 :rtype: :class:`numpy.array`
794 """
795 if (c1,c2) not in self._angles:
796 self._GetSuperposeData(c1, c2)
797 return self._angles[(c1,c2)]
798
799 def GetAxis(self, c1, c2):
800 """Get axis of symmetry from superposition of chain *c1* with chain *c2*.
801 Computed on first use based on :attr:`ca_chains` (cached).
802
803 :param c1: Chain name for first chain to superpose.
804 :type c1: :class:`str`
805 :param c2: Chain name for second chain to superpose.
806 :type c2: :class:`str`
807 :return: Rotational axis (may contain nan if something fails).
808 :rtype: :class:`numpy.array`
809 """
810 if (c1,c2) not in self._axis:
811 self._GetSuperposeData(c1, c2)
812 return self._axis[(c1,c2)]
813
814 @property
815 def contacts(self):
816 """
817 Connectivity dictionary (**read/write**).
818 As given by :func:`GetContacts` with *calpha_only* = False on :attr:`ent`.
819
820 :getter: Computed on first use (cached)
821 :setter: Uses :func:`FilterContacts` to ensure that we only keep contacts
822 for chains in the cleaned entity.
823 :type: See return type of :func:`GetContacts`
824 """
825 if self._contacts is None:
826 self._contacts = GetContacts(self.ent, False)
827 return self._contacts
828
829 @contacts.setter
830 def contacts(self, new_contacts):
831 chain_names = set([ch.name for ch in self.ent.chains])
832 self._contacts = FilterContacts(new_contacts, chain_names)
833
834 @property
835 def contacts_ca(self):
836 """
837 CA-only connectivity dictionary (**read/write**).
838 Like :attr:`contacts` but with *calpha_only* = True in :func:`GetContacts`.
839 """
840 if self._contacts_ca is None:
841 self._contacts_ca = GetContacts(self.ent, True)
842 return self._contacts_ca
843
844 @contacts_ca.setter
845 def contacts_ca(self, new_contacts):
846 chain_names = set([ch.name for ch in self.ent.chains])
847 self._contacts_ca = FilterContacts(new_contacts, chain_names)
848
849
852
853 def _GetSuperposeData(self, c1, c2):
854 """Fill _angles and _axis from superposition of CA chains of c1 and c2."""
855 # get aligned views (must contain identical numbers of atoms!)
856 aln = self.GetAlignment(c1, c2)
857 if not aln:
858 # fallback for non-aligned stuff (nan)
859 self._angles[(c1,c2)] = np.empty(3) * np.nan
860 self._axis[(c1,c2)] = np.empty(3) * np.nan
861 return
862 v1, v2 = seq.ViewsFromAlignment(aln)
863 if v1.atom_count < 3:
864 # fallback for non-aligned stuff (nan)
865 self._angles[(c1,c2)] = np.empty(3) * np.nan
866 self._axis[(c1,c2)] = np.empty(3) * np.nan
867 return
868 # get transformation
869 sup_res = mol.alg.SuperposeSVD(v1, v2, apply_transform=False)
870 Rt = sup_res.transformation
871 # extract angles
872 a,b,c = _GetAngles(Rt)
873 self._angles[(c1,c2)] = np.asarray([a,b,c])
874 # extract axis of symmetry
875 R3 = geom.Rotation3(Rt.ExtractRotation())
876 self._axis[(c1,c2)] = np.asarray(R3.GetRotationAxis().data)
877
878
881
882def FilterContacts(contacts, chain_names):
883 """Filter contacts to contain only contacts for chains in *chain_names*.
884
885 :param contacts: Connectivity dictionary as produced by :func:`GetContacts`.
886 :type contacts: :class:`dict`
887 :param chain_names: Chain names to keep.
888 :type chain_names: :class:`list` or (better) :class:`set`
889 :return: New connectivity dictionary (format as in :func:`GetContacts`)
890 :rtype: :class:`dict`
891 """
892 # create new dict with subset
893 filtered_contacts = dict()
894 for c1 in contacts:
895 if c1 in chain_names:
896 new_contacts = dict()
897 for c2 in contacts[c1]:
898 if c2 in chain_names:
899 new_contacts[c2] = contacts[c1][c2]
900 # avoid adding empty dicts
901 if new_contacts:
902 filtered_contacts[c1] = new_contacts
903 return filtered_contacts
904
905def GetContacts(entity, calpha_only, dist_thr=12.0):
906 """Get inter-chain contacts of a macromolecular entity.
907
908 Contacts are pairs of residues within a given distance belonging to different
909 chains. They are stored once per pair and include the CA/CB-CA/CB distance.
910
911 :param entity: An entity to check connectivity for.
912 :type entity: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
913 :param calpha_only: If True, we only consider CA-CA distances. Else, we use CB
914 unless the residue is a GLY.
915 :type calpha_only: :class:`bool`
916 :param dist_thr: Maximal CA/CB-CA/CB distance to be considered in contact.
917 :type dist_thr: :class:`float`
918 :return: A connectivity dictionary. A pair of residues with chain names
919 *ch_name1* & *ch_name2* (*ch_name1* < *ch_name2*), residue numbers
920 *res_num1* & *res_num2* and distance *dist* (<= *dist_thr*) are
921 stored as *result[ch_name1][ch_name2][res_num1][res_num2]* = *dist*.
922 :rtype: :class:`dict`
923 """
924 # get ent copy to search on
925 if calpha_only:
926 ev = entity.Select("aname=CA")
927 else:
928 ev = entity.Select("(rname=GLY and aname=CA) or aname=CB")
929 ent = mol.CreateEntityFromView(ev, True)
930 # search all vs all
931 contacts = dict()
932 for atom in ent.atoms:
933 ch_name1 = atom.chain.name
934 res_num1 = atom.residue.number.num
935 close_atoms = ent.FindWithin(atom.pos, dist_thr)
936 for close_atom in close_atoms:
937 ch_name2 = close_atom.chain.name
938 if ch_name2 > ch_name1:
939 res_num2 = close_atom.residue.number.num
940 dist = geom.Distance(atom.pos, close_atom.pos)
941 # add to contacts
942 if ch_name1 not in contacts:
943 contacts[ch_name1] = dict()
944 if ch_name2 not in contacts[ch_name1]:
945 contacts[ch_name1][ch_name2] = dict()
946 if res_num1 not in contacts[ch_name1][ch_name2]:
947 contacts[ch_name1][ch_name2][res_num1] = dict()
948 contacts[ch_name1][ch_name2][res_num1][res_num2] = round(dist, 3)
949 # DONE
950 return contacts
951
952
955
956class OligoLDDTScorer(object):
957 """Helper class to calculate oligomeric lDDT scores.
958
959 This class can be used independently, but commonly it will be created by
960 calling :func:`QSscorer.GetOligoLDDTScorer`.
961
962 .. note::
963
964 By construction, lDDT scores are not symmetric and hence it matters which
965 structure is the reference (:attr:`ref`) and which one is the model
966 (:attr:`mdl`). Extra residues in the model are generally not considered.
967 Extra chains in both model and reference can be considered by setting the
968 :attr:`penalize_extra_chains` flag to True.
969
970 :param ref: Sets :attr:`ref`
971 :param mdl: Sets :attr:`mdl`
972 :param alignments: Sets :attr:`alignments`
973 :param calpha_only: Sets :attr:`calpha_only`
974 :param settings: Sets :attr:`settings`
975 :param penalize_extra_chains: Sets :attr:`penalize_extra_chains`
976 :param chem_mapping: Sets :attr:`chem_mapping`. Must be given if
977 *penalize_extra_chains* is True.
978
979 .. attribute:: ref
980 mdl
981
982 Full reference/model entity to be scored. The entity must contain all chains
983 mapped in :attr:`alignments` and may also contain additional ones which are
984 considered if :attr:`penalize_extra_chains` is True.
985
986 :type: :class:`~ost.mol.EntityHandle`
987
988 .. attribute:: alignments
989
990 One alignment for each mapped chain of :attr:`ref`/:attr:`mdl` as defined in
991 :attr:`QSscorer.alignments`. The first sequence of each alignment belongs to
992 :attr:`ref` and the second one to :attr:`mdl`. Sequences must have sequence
993 naming and attached views as defined in :attr:`QSscorer.alignments`.
994
995 :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
996
997 .. attribute:: calpha_only
998
999 If True, restricts lDDT score to CA only.
1000
1001 :type: :class:`bool`
1002
1003 .. attribute:: settings
1004
1005 Settings to use for lDDT scoring.
1006
1007 :type: :class:`~ost.mol.alg.lDDTSettings`
1008
1009 .. attribute:: penalize_extra_chains
1010
1011 If True, extra chains in both :attr:`ref` and :attr:`mdl` will penalize the
1012 lDDT scores.
1013
1014 :type: :class:`bool`
1015
1016 .. attribute:: chem_mapping
1017
1018 Inter-complex mapping of chemical groups as defined in
1019 :attr:`QSscorer.chem_mapping`. Used to find "chem-mapped" chains in
1020 :attr:`ref` for unmapped chains in :attr:`mdl` when penalizing scores.
1021 Each unmapped model chain can add extra reference-contacts according to the
1022 average total contacts of each single "chem-mapped" reference chain. If
1023 there is no "chem-mapped" reference chain, a warning is shown and the model
1024 chain is ignored.
1025
1026
1027 Only relevant if :attr:`penalize_extra_chains` is True.
1028
1029 :type: :class:`dict` with key = :class:`tuple` of chain names in
1030 :attr:`ref` and value = :class:`tuple` of chain names in
1031 :attr:`mdl`.
1032 """
1033
1034 # NOTE: one could also allow computation of both penalized and unpenalized
1035 # in same object -> must regenerate lddt_ref / lddt_mdl though
1036
1037 def __init__(self, ref, mdl, alignments, calpha_only, settings,
1038 penalize_extra_chains=False, chem_mapping=None):
1039 # sanity checks
1040 if chem_mapping is None and penalize_extra_chains:
1041 raise RuntimeError("Must provide chem_mapping when requesting penalty "
1042 "for extra chains!")
1043 if not penalize_extra_chains:
1044 # warn for unmapped model chains
1045 unmapped_mdl_chains = self._GetUnmappedMdlChains(mdl, alignments)
1046 if unmapped_mdl_chains:
1047 LogWarning('MODEL contains chains unmapped to REFERENCE, '
1048 'lDDT is not considering MODEL chains %s' \
1049 % str(list(unmapped_mdl_chains)))
1050 # warn for unmapped reference chains
1051 ref_chains = set(ch.name for ch in ref.chains)
1052 mapped_ref_chains = set(aln.GetSequence(0).GetName() for aln in alignments)
1053 unmapped_ref_chains = (ref_chains - mapped_ref_chains)
1054 if unmapped_ref_chains:
1055 LogWarning('REFERENCE contains chains unmapped to MODEL, '
1056 'lDDT is not considering REFERENCE chains %s' \
1057 % str(list(unmapped_ref_chains)))
1058 # prepare fields
1059 self.ref = ref
1060 self.mdl = mdl
1061 self.alignments = alignments
1062 self.calpha_only = calpha_only
1063 self.settings = settings
1064 self.penalize_extra_chains = penalize_extra_chains
1065 self.chem_mapping = chem_mapping
1066 self._sc_lddt = None
1067 self._oligo_lddt = None
1068 self._weighted_lddt = None
1069 self._lddt_ref = None
1070 self._lddt_mdl = None
1073 self._ref_scorers = None
1074 self._model_penalty = None
1075
1076 @property
1077 def oligo_lddt(self):
1078 """Oligomeric lDDT score.
1079
1080 The score is computed as conserved contacts divided by the total contacts
1081 in the reference using the :attr:`oligo_lddt_scorer`, which uses the full
1082 complex as reference/model structure. If :attr:`penalize_extra_chains` is
1083 True, the reference/model complexes contain all chains (otherwise only the
1084 mapped ones) and additional contacts are added to the reference's total
1085 contacts for unmapped model chains according to the :attr:`chem_mapping`.
1086
1087 The main difference with :attr:`weighted_lddt` is that the lDDT scorer
1088 "sees" the full complex here (incl. inter-chain contacts), while the
1089 weighted single chain score looks at each chain separately.
1090
1091 :getter: Computed on first use (cached)
1092 :type: :class:`float`
1093 """
1094 if self._oligo_lddt is None:
1095 LogInfo('Reference %s has: %s chains' \
1096 % (self.ref.GetName(), self.ref.chain_count))
1097 LogInfo('Model %s has: %s chains' \
1098 % (self.mdl.GetName(), self.mdl.chain_count))
1099
1100 # score with or w/o extra-chain penalty
1101 if self.penalize_extra_chains:
1102 denominator = self.oligo_lddt_scorer.total_contacts
1103 denominator += self._GetModelPenalty()
1104 if denominator > 0:
1105 oligo_lddt = self.oligo_lddt_scorer.conserved_contacts \
1106 / float(denominator)
1107 else:
1108 oligo_lddt = 0.0
1109 else:
1110 oligo_lddt = self.oligo_lddt_scorer.global_score
1111 self._oligo_lddt = oligo_lddt
1112 return self._oligo_lddt
1113
1114 @property
1115 def weighted_lddt(self):
1116 """Weighted average of single chain lDDT scores.
1117
1118 The score is computed as a weighted average of single chain lDDT scores
1119 (see :attr:`sc_lddt_scorers`) using the total contacts of each single
1120 reference chain as weights. If :attr:`penalize_extra_chains` is True,
1121 unmapped chains are added with a 0 score and total contacts taken from
1122 the actual reference chains or (for unmapped model chains) using the
1123 :attr:`chem_mapping`.
1124
1125 See :attr:`oligo_lddt` for a comparison of the two scores.
1126
1127 :getter: Computed on first use (cached)
1128 :type: :class:`float`
1129 """
1130 if self._weighted_lddt is None:
1131 scores = [s.global_score for s in self.sc_lddt_scorers]
1132 weights = [s.total_contacts for s in self.sc_lddt_scorers]
1133 nominator = sum([s * w for s, w in zip(scores, weights)])
1134 if self.penalize_extra_chains:
1135 ref_scorers = self._GetRefScorers()
1136 denominator = sum(s.total_contacts for s in list(ref_scorers.values()))
1137 denominator += self._GetModelPenalty()
1138 else:
1139 denominator = sum(weights)
1140 if denominator > 0:
1141 self._weighted_lddt = nominator / float(denominator)
1142 else:
1143 self._weighted_lddt = 0.0
1144 return self._weighted_lddt
1145
1146 @property
1147 def lddt_ref(self):
1148 """The reference entity used for oligomeric lDDT scoring
1149 (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1150
1151 Since the lDDT computation requires a single chain with mapped residue
1152 numbering, all chains of :attr:`ref` are appended into a single chain X with
1153 unique residue numbers according to the column-index in the alignment. The
1154 alignments are in the same order as they appear in :attr:`alignments`.
1155 Additional residues are appended at the end of the chain with unique residue
1156 numbers. Unmapped chains are only added if :attr:`penalize_extra_chains` is
1157 True. Only CA atoms are considered if :attr:`calpha_only` is True.
1158
1159 :getter: Computed on first use (cached)
1160 :type: :class:`~ost.mol.EntityHandle`
1161 """
1162 if self._lddt_ref is None:
1164 return self._lddt_ref
1165
1166 @property
1167 def lddt_mdl(self):
1168 """The model entity used for oligomeric lDDT scoring
1169 (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1170
1171 Like :attr:`lddt_ref`, this is a single chain X containing all chains of
1172 :attr:`mdl`. The residue numbers match the ones in :attr:`lddt_ref` where
1173 aligned and have unique numbers for additional residues.
1174
1175 :getter: Computed on first use (cached)
1176 :type: :class:`~ost.mol.EntityHandle`
1177 """
1178 if self._lddt_mdl is None:
1180 return self._lddt_mdl
1181
1182 @property
1184 """lDDT Scorer object for :attr:`lddt_ref` and :attr:`lddt_mdl`.
1185
1186 :getter: Computed on first use (cached)
1187 :type: :class:`~ost.mol.alg.lDDTScorer`
1188 """
1189 if self._oligo_lddt_scorer is None:
1191 references=[self.lddt_ref.Select("")],
1192 model=self.lddt_mdl.Select(""),
1193 settings=self.settings)
1194 return self._oligo_lddt_scorer
1195
1196 @property
1198 """List of scorer objects for each chain mapped in :attr:`alignments`.
1199
1200 :getter: Computed on first use (cached)
1201 :type: :class:`list` of :class:`MappedLDDTScorer`
1202 """
1203 if self._mapped_lddt_scorers is None:
1204 self._mapped_lddt_scorers = list()
1205 for aln in self.alignments:
1206 mapped_lddt_scorer = MappedLDDTScorer(aln, self.calpha_only,
1207 self.settings)
1208 self._mapped_lddt_scorers.append(mapped_lddt_scorer)
1209 return self._mapped_lddt_scorers
1210
1211 @property
1213 """List of lDDT scorer objects extracted from :attr:`mapped_lddt_scorers`.
1214
1215 :type: :class:`list` of :class:`~ost.mol.alg.lDDTScorer`
1216 """
1217 return [mls.lddt_scorer for mls in self.mapped_lddt_scorers]
1218
1219 @property
1220 def sc_lddt(self):
1221 """List of global scores extracted from :attr:`sc_lddt_scorers`.
1222
1223 If scoring for a mapped chain fails, an error is displayed and a score of 0
1224 is assigned.
1225
1226 :getter: Computed on first use (cached)
1227 :type: :class:`list` of :class:`float`
1228 """
1229 if self._sc_lddt is None:
1230 self._sc_lddt = list()
1231 for lddt_scorer in self.sc_lddt_scorers:
1232 try:
1233 self._sc_lddt.append(lddt_scorer.global_score)
1234 except Exception as ex:
1235 LogError('Single chain lDDT failed:', str(ex))
1236 self._sc_lddt.append(0.0)
1237 return self._sc_lddt
1238
1239
1242
1244 # simple wrapper to avoid code duplication
1246 self.alignments, self.ref, self.mdl, self.calpha_only,
1248
1249 @staticmethod
1250 def _GetUnmappedMdlChains(mdl, alignments):
1251 # assume model is second sequence in alignment and is named by chain
1252 mdl_chains = set(ch.name for ch in mdl.chains)
1253 mapped_mdl_chains = set(aln.GetSequence(1).GetName() for aln in alignments)
1254 return (mdl_chains - mapped_mdl_chains)
1255
1257 # single chain lddt scorers for each reference chain (key = chain name)
1258 if self._ref_scorers is None:
1259 # collect from mapped_lddt_scorers
1260 ref_scorers = dict()
1261 for mapped_lddt_scorer in self.mapped_lddt_scorers:
1262 ref_ch_name = mapped_lddt_scorer.reference_chain_name
1263 ref_scorers[ref_ch_name] = mapped_lddt_scorer.lddt_scorer
1264 # add new ones where needed
1265 for ch in self.ref.chains:
1266 if ch.name not in ref_scorers:
1267 if self.calpha_only:
1268 ref_chain = ch.Select('aname=CA')
1269 else:
1270 ref_chain = ch.Select('')
1271 ref_scorers[ch.name] = lDDTScorer(
1272 references=[ref_chain],
1273 model=ref_chain,
1274 settings=self.settings)
1275 # store in cache
1276 self._ref_scorers = ref_scorers
1277 # fetch from cache
1278 return self._ref_scorers
1279
1281 # extra value to add to total number of distances for extra model chains
1282 # -> estimated from chem-mapped reference chains
1283 if self._model_penalty is None:
1284 # sanity check
1285 if self.chem_mapping is None:
1286 raise RuntimeError("Must provide chem_mapping when requesting penalty "
1287 "for extra model chains!")
1288 # get cached ref_scorers
1289 ref_scorers = self._GetRefScorers()
1290 # get unmapped model chains
1291 unmapped_mdl_chains = self._GetUnmappedMdlChains(self.mdl, self.alignments)
1292 # map extra chains to ref. chains
1293 model_penalty = 0
1294 for ch_name_mdl in sorted(unmapped_mdl_chains):
1295 # get penalty for chain
1296 cur_penalty = None
1297 for cm_ref, cm_mdl in self.chem_mapping.items():
1298 if ch_name_mdl in cm_mdl:
1299 # penalize by an average of the chem. mapped ref. chains
1300 cur_penalty = 0
1301 for ch_name_ref in cm_ref:
1302 # assumes that total_contacts is cached (for speed)
1303 cur_penalty += ref_scorers[ch_name_ref].total_contacts
1304 cur_penalty /= float(len(cm_ref))
1305 break
1306 # report penalty
1307 if cur_penalty is None:
1308 LogWarning('Extra MODEL chain %s could not be chemically mapped to '
1309 'any chain in REFERENCE, lDDT cannot consider it!' \
1310 % ch_name_mdl)
1311 else:
1312 LogScript('Extra MODEL chain %s added to lDDT score by considering '
1313 'chemically mapped chains in REFERENCE.' % ch_name_mdl)
1314 model_penalty += cur_penalty
1315 # store in cache
1316 self._model_penalty = model_penalty
1317 # fetch from cache
1318 return self._model_penalty
1319
1320
1321class MappedLDDTScorer(object):
1322 """A simple class to calculate a single-chain lDDT score on a given chain to
1323 chain mapping as extracted from :class:`OligoLDDTScorer`.
1324
1325 :param alignment: Sets :attr:`alignment`
1326 :param calpha_only: Sets :attr:`calpha_only`
1327 :param settings: Sets :attr:`settings`
1328
1329 .. attribute:: alignment
1330
1331 Alignment with two sequences named according to the mapped chains and with
1332 views attached to both sequences (e.g. one of the items of
1333 :attr:`QSscorer.alignments`).
1334
1335 The first sequence is assumed to be the reference and the second one the
1336 model. Since the lDDT score is not symmetric (extra residues in model are
1337 ignored), the order is important.
1338
1339 :type: :class:`~ost.seq.AlignmentHandle`
1340
1341 .. attribute:: calpha_only
1342
1343 If True, restricts lDDT score to CA only.
1344
1345 :type: :class:`bool`
1346
1347 .. attribute:: settings
1348
1349 Settings to use for lDDT scoring.
1350
1351 :type: :class:`~ost.mol.alg.lDDTSettings`
1352
1353 .. attribute:: lddt_scorer
1354
1355 lDDT Scorer object for the given chains.
1356
1357 :type: :class:`~ost.mol.alg.lDDTScorer`
1358
1359 .. attribute:: reference_chain_name
1360
1361 Chain name of the reference.
1362
1363 :type: :class:`str`
1364
1365 .. attribute:: model_chain_name
1366
1367 Chain name of the model.
1368
1369 :type: :class:`str`
1370 """
1371 def __init__(self, alignment, calpha_only, settings):
1372 # prepare fields
1373 self.alignment = alignment
1374 self.calpha_only = calpha_only
1375 self.settings = settings
1376 self.lddt_scorer = None # set in _InitScorer
1377 self.reference_chain_name = alignment.sequences[0].name
1378 self.model_chain_name = alignment.sequences[1].name
1379 self._old_number_label = "old_num"
1380 self._extended_alignment = None # set in _InitScorer
1381 # initialize lDDT scorer
1382 self._InitScorer()
1383
1385 """
1386 :return: Scores for each residue
1387 :rtype: :class:`list` of :class:`dict` with one item for each residue
1388 existing in model and reference:
1389
1390 - "residue_number": Residue number in reference chain
1391 - "residue_name": Residue name in reference chain
1392 - "lddt": local lDDT
1393 - "conserved_contacts": number of conserved contacts
1394 - "total_contacts": total number of contacts
1395 """
1396 scores = list()
1397 assigned_residues = list()
1398 # Make sure the score is calculated
1399 self.lddt_scorer.global_score
1400 for col in self._extended_alignment:
1401 if col[0] != "-" and col.GetResidue(3).IsValid():
1402 ref_res = col.GetResidue(0)
1403 mdl_res = col.GetResidue(1)
1404 ref_res_renum = col.GetResidue(2)
1405 mdl_res_renum = col.GetResidue(3)
1406 if ref_res.one_letter_code != ref_res_renum.one_letter_code:
1407 raise RuntimeError("Reference residue name mapping inconsistent: %s != %s" %
1408 (ref_res.one_letter_code,
1409 ref_res_renum.one_letter_code))
1410 if mdl_res.one_letter_code != mdl_res_renum.one_letter_code:
1411 raise RuntimeError("Model residue name mapping inconsistent: %s != %s" %
1412 (mdl_res.one_letter_code,
1413 mdl_res_renum.one_letter_code))
1414 if ref_res.GetNumber().num != ref_res_renum.GetIntProp(self._old_number_label):
1415 raise RuntimeError("Reference residue number mapping inconsistent: %s != %s" %
1416 (ref_res.GetNumber().num,
1417 ref_res_renum.GetIntProp(self._old_number_label)))
1418 if mdl_res.GetNumber().num != mdl_res_renum.GetIntProp(self._old_number_label):
1419 raise RuntimeError("Model residue number mapping inconsistent: %s != %s" %
1420 (mdl_res.GetNumber().num,
1421 mdl_res_renum.GetIntProp(self._old_number_label)))
1422 if ref_res.qualified_name in assigned_residues:
1423 raise RuntimeError("Duplicated residue in reference: " %
1424 (ref_res.qualified_name))
1425 else:
1426 assigned_residues.append(ref_res.qualified_name)
1427 # check if property there (may be missing for CA-only)
1428 if mdl_res_renum.HasProp(self.settings.label):
1429 scores.append({
1430 "residue_number": ref_res.GetNumber().num,
1431 "residue_name": ref_res.name,
1432 "lddt": mdl_res_renum.GetFloatProp(self.settings.label),
1433 "conserved_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_conserved"),
1434 "total_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_total")})
1435 return scores
1436
1437
1440
1441 def _InitScorer(self):
1442 # Use copy of alignment (extended by 2 extra sequences for renumbering)
1443 aln = self.alignment.Copy()
1444 # Get chains and renumber according to alignment (for lDDT)
1445 reference = Renumber(
1446 aln.GetSequence(0),
1447 old_number_label=self._old_number_label).CreateFullView()
1448 refseq = seq.CreateSequence(
1449 "reference_renumbered",
1450 aln.GetSequence(0).GetString())
1451 refseq.AttachView(reference)
1452 aln.AddSequence(refseq)
1453 model = Renumber(
1454 aln.GetSequence(1),
1455 old_number_label=self._old_number_label).CreateFullView()
1456 modelseq = seq.CreateSequence(
1457 "model_renumbered",
1458 aln.GetSequence(1).GetString())
1459 modelseq.AttachView(model)
1460 aln.AddSequence(modelseq)
1461 # Filter to CA-only if desired (done after AttachView to not mess it up)
1462 if self.calpha_only:
1463 self.lddt_scorer = lDDTScorer(
1464 references=[reference.Select('aname=CA')],
1465 model=model.Select('aname=CA'),
1466 settings=self.settings)
1467 else:
1468 self.lddt_scorer = lDDTScorer(
1469 references=[reference],
1470 model=model,
1471 settings=self.settings)
1472 # Store alignment for later
1473 self._extended_alignment = aln
1474
1475
1478
1479# general
1480
1481def _AlignAtomSeqs(seq_1, seq_2):
1482 """
1483 :type seq_1: :class:`ost.seq.SequenceHandle`
1484 :type seq_2: :class:`ost.seq.SequenceHandle`
1485 :return: Alignment of two sequences using a global alignment. Views attached
1486 to the input sequences will remain attached in the aln.
1487 :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
1488 """
1489 # NOTE: If the two sequence have a greatly different length
1490 # a local alignment could be a better choice...
1491 aln = None
1492 alns = seq.alg.GlobalAlign(seq_1, seq_2, seq.alg.BLOSUM62)
1493 if alns: aln = alns[0]
1494 if not aln:
1495 LogWarning('Failed to align %s to %s' % (seq_1.name, seq_2.name))
1496 LogWarning('%s: %s' % (seq_1.name, seq_1.string))
1497 LogWarning('%s: %s' % (seq_2.name, seq_2.string))
1498 return aln
1499
1501 """
1502 :return: String to be used with Select(cname=<RETURN>). Takes care of joining
1503 and putting quotation marks where needed.
1504 :rtype: :class:`str`
1505 :param ch_names: Some iterable list of chain names (:class:`str` items).
1506 """
1507 return ','.join(mol.QueryQuoteName(ch_name) for ch_name in ch_names)
1508
1509# QS entity
1510
1512 """
1513 :param ent: The OST entity to be cleaned.
1514 :type ent: :class:`EntityHandle` or :class:`EntityView`
1515 :return: A tuple of 3 items: :attr:`QSscoreEntity.ent`,
1516 :attr:`QSscoreEntity.removed_chains`,
1517 :attr:`QSscoreEntity.calpha_only`
1518 """
1519 # find chains to remove
1520 removed_chains = []
1521 for ch in ent.chains:
1522 # we remove chains if they are small-peptides or if the contain no aa
1523 # or if they contain only unknown or modified residues
1524 if ch.name in ['-', '_'] \
1525 or ch.residue_count < 20 \
1526 or not any(r.chem_type.IsAminoAcid() for r in ch.residues) \
1527 or not (set(r.one_letter_code for r in ch.residues) - {'?', 'X'}):
1528 removed_chains.append(ch.name)
1529
1530 # remove them from *ent*
1531 if removed_chains:
1532 view = ent.Select('cname!=%s' % _FixSelectChainNames(set(removed_chains)))
1533 ent_new = mol.CreateEntityFromView(view, True)
1534 ent_new.SetName(ent.GetName())
1535 else:
1536 ent_new = ent
1537
1538 # check if CA only
1539 calpha_only = False
1540 if ent_new.atom_count > 0 and ent_new.Select('aname=CB').atom_count == 0:
1541 LogInfo('Structure %s is a CA only structure!' % ent_new.GetName())
1542 calpha_only = True
1543
1544 # report and return
1545 if removed_chains:
1546 LogInfo('Chains removed from %s: %s' \
1547 % (ent_new.GetName(), ''.join(removed_chains)))
1548 LogInfo('Chains in %s: %s' \
1549 % (ent_new.GetName(), ''.join([c.name for c in ent_new.chains])))
1550 return ent_new, removed_chains, calpha_only
1551
1553 """
1554 :param ent: Entity to process
1555 :type ent: :class:`EntityHandle` or :class:`EntityView`
1556 :return: New entity with only CA and only one atom per residue
1557 (see :attr:`QSscoreEntity.ca_entity`)
1558 """
1559 # cook up CA only view (diff from Select = guaranteed 1 atom per residue)
1560 ca_view = ent.CreateEmptyView()
1561 # add chain by chain
1562 for res in ent.residues:
1563 ca_atom = res.FindAtom("CA")
1564 if ca_atom.IsValid():
1565 ca_view.AddAtom(ca_atom)
1566 # finalize
1567 return mol.CreateEntityFromView(ca_view, False)
1568
1569def _GetChemGroups(qs_ent, seqid_thr=95.):
1570 """
1571 :return: Intra-complex group of chemically identical polypeptide chains
1572 (see :attr:`QSscoreEntity.chem_groups`)
1573
1574 :param qs_ent: Entity to process
1575 :type qs_ent: :class:`QSscoreEntity`
1576 :param seqid_thr: Threshold used to decide when two chains are identical.
1577 95 percent tolerates the few mutations crystallographers
1578 like to do.
1579 :type seqid_thr: :class:`float`
1580 """
1581 # get data from qs_ent
1582 ca_chains = qs_ent.ca_chains
1583 chain_names = sorted(ca_chains.keys())
1584 # get pairs of identical chains
1585 # NOTE: this scales quadratically with number of chains and may be optimized
1586 # -> one could merge it with "merge transitive pairs" below...
1587 id_seqs = []
1588 for ch_1, ch_2 in itertools.combinations(chain_names, 2):
1589 aln = qs_ent.GetAlignment(ch_1, ch_2)
1590 if aln and seq.alg.SequenceIdentity(aln) > seqid_thr:
1591 id_seqs.append((ch_1, ch_2))
1592 # trivial case: no matching pairs
1593 if not id_seqs:
1594 return [[name] for name in chain_names]
1595
1596 # merge transitive pairs
1597 groups = []
1598 for ch_1, ch_2 in id_seqs:
1599 found = False
1600 for g in groups:
1601 if ch_1 in g or ch_2 in g:
1602 found = True
1603 g.add(ch_1)
1604 g.add(ch_2)
1605 if not found:
1606 groups.append(set([ch_1, ch_2]))
1607 # sort internally based on sequence length
1608 chem_groups = []
1609 for g in groups:
1610 ranked_g = sorted([(-ca_chains[ch].length, ch) for ch in g])
1611 chem_groups.append([ch for _,ch in ranked_g])
1612 # add other dissimilar chains
1613 for ch in chain_names:
1614 if not any(ch in g for g in chem_groups):
1615 chem_groups.append([ch])
1616
1617 return chem_groups
1618
1620 """Computes the Euler angles given a transformation matrix.
1621
1622 :param Rt: Rt operator.
1623 :type Rt: :class:`ost.geom.Mat4`
1624 :return: A :class:`tuple` of angles for each axis (x,y,z)
1625 """
1626 rot = np.asarray(Rt.ExtractRotation().data).reshape(3,3)
1627 tx = np.arctan2(rot[2,1], rot[2,2])
1628 if tx < 0:
1629 tx += 2*np.pi
1630 ty = np.arctan2(rot[2,0], np.sqrt(rot[2,1]**2 + rot[2,2]**2))
1631 if ty < 0:
1632 ty += 2*np.pi
1633 tz = np.arctan2(rot[1,0], rot[0,0])
1634 if tz < 0:
1635 tz += 2*np.pi
1636 return tx,ty,tz
1637
1638# QS scorer
1639
1640def _GetChemGroupsMapping(qs_ent_1, qs_ent_2):
1641 """
1642 :return: Inter-complex mapping of chemical groups
1643 (see :attr:`QSscorer.chem_mapping`)
1644
1645 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1646 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1647 """
1648 # get chem. groups and unique representative
1649 chem_groups_1 = qs_ent_1.chem_groups
1650 chem_groups_2 = qs_ent_2.chem_groups
1651 repr_chains_1 = {x[0]: tuple(x) for x in chem_groups_1}
1652 repr_chains_2 = {x[0]: tuple(x) for x in chem_groups_2}
1653
1654 # if entities do not have different number of unique chains, we get the
1655 # mapping for the smaller set
1656 swapped = False
1657 if len(repr_chains_2) < len(repr_chains_1):
1658 repr_chains_1, repr_chains_2 = repr_chains_2, repr_chains_1
1659 qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1660 swapped = True
1661
1662 # find the closest to each chain between the two entities
1663 # NOTE: this may still be sensible to orthology problem
1664 # -> currently we use a global alignment and seq. id. to rank pairs
1665 # -> we also tried local alignments and weighting the seq. id. by the
1666 # coverages of the alignments (gapless string in aln. / seq. length)
1667 # but global aln performed better...
1668 chain_pairs = []
1669 ca_chains_1 = qs_ent_1.ca_chains
1670 ca_chains_2 = qs_ent_2.ca_chains
1671 for ch_1 in list(repr_chains_1.keys()):
1672 for ch_2 in list(repr_chains_2.keys()):
1673 aln = _AlignAtomSeqs(ca_chains_1[ch_1], ca_chains_2[ch_2])
1674 if aln:
1675 chains_seqid = seq.alg.SequenceIdentity(aln)
1676 LogVerbose('Sequence identity', ch_1, ch_2, 'seqid=%.2f' % chains_seqid)
1677 chain_pairs.append((chains_seqid, ch_1, ch_2))
1678
1679 # get top matching groups first
1680 chain_pairs = sorted(chain_pairs, reverse=True)
1681 chem_mapping = {}
1682 for _, c1, c2 in chain_pairs:
1683 skip = False
1684 for a,b in chem_mapping.items():
1685 if repr_chains_1[c1] == a or repr_chains_2[c2] == b:
1686 skip = True
1687 break
1688 if not skip:
1689 chem_mapping[repr_chains_1[c1]] = repr_chains_2[c2]
1690 if swapped:
1691 chem_mapping = {y: x for x, y in chem_mapping.items()}
1692 qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1693
1694 # notify chains without partner
1695 mapped_1 = set([i for s in list(chem_mapping.keys()) for i in s])
1696 chains_1 = set([c.name for c in qs_ent_1.ent.chains])
1697 if chains_1 - mapped_1:
1698 LogWarning('Unmapped Chains in %s: %s'
1699 % (qs_ent_1.GetName(), ','.join(list(chains_1 - mapped_1))))
1700
1701 mapped_2 = set([i for s in list(chem_mapping.values()) for i in s])
1702 chains_2 = set([c.name for c in qs_ent_2.ent.chains])
1703 if chains_2 - mapped_2:
1704 LogWarning('Unmapped Chains in %s: %s'
1705 % (qs_ent_2.GetName(), ','.join(list(chains_2 - mapped_2))))
1706
1707 # check if we have any chains left
1708 LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping))
1709 if len(mapped_1) < 1 or len(mapped_2) < 1:
1710 raise QSscoreError('Less than 1 chains left in chem_mapping.')
1711 return chem_mapping
1712
1713def _SelectFew(l, max_elements):
1714 """Return l or copy of l with at most *max_elements* entries."""
1715 n_el = len(l)
1716 if n_el <= max_elements:
1717 return l
1718 else:
1719 # cheap integer ceiling (-1 to ensure that x*max_elements gets d_el = x)
1720 d_el = ((n_el - 1) // max_elements) + 1
1721 new_l = list()
1722 for i in range(0, n_el, d_el):
1723 new_l.append(l[i])
1724 return new_l
1725
1726def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain,
1727 clustalw_bin):
1728 """
1729 :return: Tuple of two :class:`~ost.mol.EntityView` objects containing subsets
1730 of *qs_ent_1* and *qs_ent_2*. Two entities are later created from
1731 those views (see :attr:`QSscorer.ent_to_cm_1` and
1732 :attr:`QSscorer.ent_to_cm_2`)
1733
1734 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1735 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1736 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1737 :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm`
1738 """
1739 # make sure name doesn't contain spaces and is unique
1740 def _FixName(seq_name, seq_names):
1741 # get rid of spaces and make it unique
1742 seq_name = seq_name.replace(' ', '-')
1743 while seq_name in seq_names:
1744 seq_name += '-'
1745 return seq_name
1746 # resulting views into CA entities using CA chain sequences
1747 ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView()
1748 ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView()
1749 ca_chains_1 = qs_ent_1.ca_chains
1750 ca_chains_2 = qs_ent_2.ca_chains
1751 # go through all mapped chemical groups
1752 for group_1, group_2 in chem_mapping.items():
1753 # MSA with ClustalW
1754 seq_list = seq.CreateSequenceList()
1755 # keep sequence-name (must be unique) to view mapping
1756 seq_to_empty_view = dict()
1757 for ch in group_1:
1758 sequence = ca_chains_1[ch].Copy()
1759 sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view)
1760 seq_to_empty_view[sequence.name] = ent_view_1
1761 seq_list.AddSequence(sequence)
1762 for ch in group_2:
1763 sequence = ca_chains_2[ch].Copy()
1764 sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view)
1765 seq_to_empty_view[sequence.name] = ent_view_2
1766 seq_list.AddSequence(sequence)
1767 alnc = ClustalW(seq_list, clustalw=clustalw_bin)
1768
1769 # collect aligned residues (list of lists of sequence_count valid residues)
1770 residue_lists = list()
1771 sequence_count = alnc.sequence_count
1772 for col in alnc:
1773 residues = [col.GetResidue(ir) for ir in range(sequence_count)]
1774 if all([r.IsValid() for r in residues]):
1775 residue_lists.append(residues)
1776 # check number of aligned residues
1777 if len(residue_lists) < 5:
1778 raise QSscoreError('Not enough aligned residues.')
1779 elif max_ca_per_chain > 0:
1780 residue_lists = _SelectFew(residue_lists, max_ca_per_chain)
1781 # check what view is for which residue
1782 res_views = [seq_to_empty_view[alnc.sequences[ir].name] \
1783 for ir in range(sequence_count)]
1784 # add to view (note: only 1 CA atom per residue in here)
1785 for res_list in residue_lists:
1786 for res, view in zip(res_list, res_views):
1787 view.AddResidue(res, mol.ViewAddFlag.INCLUDE_ATOMS)
1788 # done
1789 return ent_view_1, ent_view_2
1790
1791
1792def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping):
1793 """
1794 :return: A pair of comparable symmetry groups (for :attr:`QSscorer.symm_1`
1795 and :attr:`QSscorer.symm_2`) between the two structures.
1796 Empty lists if no symmetry identified.
1797
1798 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1799 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1800 :param ent_to_cm_1: See :attr:`QSscorer.ent_to_cm_1`
1801 :param ent_to_cm_2: See :attr:`QSscorer.ent_to_cm_2`
1802 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1803 """
1804 LogInfo('Identifying Symmetry Groups...')
1805
1806 # get possible symmetry groups
1807 symm_subg_1 = _GetSymmetrySubgroups(qs_ent_1, ent_to_cm_1,
1808 list(chem_mapping.keys()))
1809 symm_subg_2 = _GetSymmetrySubgroups(qs_ent_2, ent_to_cm_2,
1810 list(chem_mapping.values()))
1811
1812 # check combination of groups
1813 LogInfo('Selecting Symmetry Groups...')
1814 # check how many mappings are to be checked for each pair of symmetry groups
1815 # -> lower number here will speed up worst-case runtime of _GetChainMapping
1816 # NOTE: this is tailored to speed up brute force all vs all mapping test
1817 # for preferred _CheckClosedSymmetry this is suboptimal!
1818 best_symm = []
1819 for symm_1, symm_2 in itertools.product(symm_subg_1, symm_subg_2):
1820 s1 = symm_1[0]
1821 s2 = symm_2[0]
1822 if len(s1) != len(s2):
1823 LogDebug('Discarded', str(symm_1), str(symm_2),
1824 ': different size of symmetry groups')
1825 continue
1826 count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1827 nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1828 LogDebug('OK', str(symm_1), str(symm_2), ': %s mappings' % nr_mapp)
1829 best_symm.append((nr_mapp, symm_1, symm_2))
1830
1831 for _, symm_1, symm_2 in sorted(best_symm):
1832 s1 = symm_1[0]
1833 s2 = symm_2[0]
1834 group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1))
1835 group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2))
1836 # check if by superposing a pair of chains within the symmetry group to
1837 # superpose all chains within the symmetry group
1838 # -> if successful, the symmetry groups are compatible
1839 closed_symm = _CheckClosedSymmetry(group_1, group_2, symm_1, symm_2,
1840 chem_mapping, 6, 0.8, find_best=False)
1841
1842 if closed_symm:
1843 return symm_1, symm_2
1844
1845 # nothing found
1846 LogInfo('No coherent symmetry identified between structures')
1847 return [], []
1848
1849
1850def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping,
1851 max_mappings_extensive):
1852 """
1853 :return: Tuple with mapping from *ent_1* to *ent_2* (see
1854 :attr:`QSscorer.chain_mapping`) and scheme used (see
1855 :attr:`QSscorer.chain_mapping_scheme`)
1856
1857 :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
1858 :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
1859 :param symm_1: See :attr:`QSscorer.symm_1`
1860 :param symm_2: See :attr:`QSscorer.symm_2`
1861 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1862 :param max_mappings_extensive: See :attr:`QSscorer.max_mappings_extensive`
1863 """
1864 LogInfo('Symmetry-groups used in %s: %s' % (ent_1.GetName(), str(symm_1)))
1865 LogInfo('Symmetry-groups used in %s: %s' % (ent_2.GetName(), str(symm_2)))
1866
1867 # quick check for closed symmetries
1868 thresholds = [( 'strict', 4, 0.8),
1869 ( 'tolerant', 6, 0.4),
1870 ('permissive', 8, 0.2)]
1871 for scheme, sup_thr, sup_fract in thresholds:
1872 # check if by superposing a pair of chains within the symmetry group to
1873 # superpose all chains of the two oligomers
1874 # -> this also returns the resulting mapping of chains
1875 mapping = _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2,
1876 chem_mapping, sup_thr, sup_fract)
1877 if mapping:
1878 LogInfo('Closed Symmetry with %s parameters' % scheme)
1879 if scheme == 'permissive':
1880 LogWarning('Permissive thresholds used for overlap based mapping ' + \
1881 'detection: check mapping manually: %s' % mapping)
1882 return mapping, scheme
1883
1884 # NOTE that what follows below is sub-optimal:
1885 # - if the two structures don't fit at all, we may map chains rather randomly
1886 # - we may need a lot of operations to find globally "best" mapping
1887 # -> COST = O(N^2) * O(N!)
1888 # (first = all pairwise chain-superpose, latter = all chain mappings)
1889 # -> a greedy chain mapping choice algo (pick best RMSD for each one-by-one)
1890 # could reduce this to O(N^2) * O(N^2) but it would need to be validated
1891 # - one could also try some other heuristic based on center of atoms of chains
1892 # -> at this point we get a bad mapping anyways...
1893
1894 # if we get here, we will need to check many combinations of mappings
1895 # -> check how many mappings must be checked and limit
1896 count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1897 LogInfo('Intra Symmetry-group mappings to check: %s' \
1898 % count['intra']['mappings'])
1899 LogInfo('Inter Symmetry-group mappings to check: %s' \
1900 % count['inter']['mappings'])
1901 nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1902 if nr_mapp > max_mappings_extensive:
1903 raise QSscoreError('Too many possible mappings: %s' % nr_mapp)
1904
1905 # to speed up the computations we cache chain views and RMSDs
1906 cached_rmsd = _CachedRMSD(ent_1, ent_2)
1907
1908 # get INTRA symmetry group chain mapping
1909 check = 0
1910 intra_mappings = [] # list of (RMSD, c1, c2, mapping) tuples (best superpose)
1911 # limit chem mapping to reference symmetry group
1912 ref_symm_1 = symm_1[0]
1913 ref_symm_2 = symm_2[0]
1914 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
1915 # for each chemically identical group
1916 for g1, g2 in asu_chem_mapping.items():
1917 # try to superpose all possible pairs
1918 for c1, c2 in itertools.product(g1, g2):
1919 # get superposition transformation
1920 LogVerbose('Superposing chains: %s, %s' % (c1, c2))
1921 res = cached_rmsd.GetSuperposition(c1, c2)
1922 # compute RMSD of possible mappings
1923 cur_mappings = [] # list of (RMSD, mapping) tuples
1924 for mapping in _ListPossibleMappings(c1, c2, asu_chem_mapping):
1925 check += 1
1926 chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1927 cur_mappings.append((chains_rmsd, mapping))
1928 LogVerbose(str(mapping), str(chains_rmsd))
1929 best_rmsd, best_mapp = min(cur_mappings)
1930 intra_mappings.append((best_rmsd, c1, c2, best_mapp))
1931 # best chain-chain superposition defines the intra asu mapping
1932 _, intra_asu_c1, intra_asu_c2, intra_asu_mapp = min(intra_mappings)
1933
1934 # if only one asu is present in any of the complexes, we're done
1935 if len(symm_1) == 1 or len(symm_2) == 1:
1936 mapping = intra_asu_mapp
1937 else:
1938 # the mapping is the element position within the asu chain list
1939 # -> this speed up a lot, assuming that the order of chain in asu groups
1940 # is following the order of symmetry operations
1941 index_asu_c1 = ref_symm_1.index(intra_asu_c1)
1942 index_asu_c2 = ref_symm_2.index(intra_asu_c2)
1943 index_mapp = {ref_symm_1.index(s1): ref_symm_2.index(s2) \
1944 for s1, s2 in intra_asu_mapp.items()}
1945 LogInfo('Intra symmetry-group mapping: %s' % str(intra_asu_mapp))
1946
1947 # get INTER symmetry group chain mapping
1948 # we take the first symmetry group from the complex having the most
1949 if len(symm_1) < len(symm_2):
1950 symm_combinations = list(itertools.product(symm_1, [symm_2[0]]))
1951 else:
1952 symm_combinations = list(itertools.product([symm_1[0]], symm_2))
1953
1954 full_asu_mapp = {tuple(symm_1): tuple(symm_2)}
1955 full_mappings = [] # list of (RMSD, mapping) tuples
1956 for s1, s2 in symm_combinations:
1957 # get superposition transformation (we take best chain-pair in asu)
1958 LogVerbose('Superposing symmetry-group: %s, %s in %s, %s' \
1959 % (s1[index_asu_c1], s2[index_asu_c2], s1, s2))
1960 res = cached_rmsd.GetSuperposition(s1[index_asu_c1], s2[index_asu_c2])
1961 # compute RMSD of possible mappings
1962 for asu_mapp in _ListPossibleMappings(s1, s2, full_asu_mapp):
1963 check += 1
1964 # need to extract full chain mapping (use indexing)
1965 mapping = {}
1966 for a, b in asu_mapp.items():
1967 for id_a, id_b in index_mapp.items():
1968 mapping[a[id_a]] = b[id_b]
1969 chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1970 full_mappings.append((chains_rmsd, mapping))
1971 LogVerbose(str(mapping), str(chains_rmsd))
1972
1973 if check != nr_mapp:
1974 LogWarning('Mapping number estimation was wrong') # sanity check
1975
1976 # return best (lowest RMSD) mapping
1977 mapping = min(full_mappings, key=lambda x: x[0])[1]
1978
1979 LogWarning('Extensive search used for mapping detection (fallback). This ' + \
1980 'approach has known limitations. Check mapping manually: %s' \
1981 % mapping)
1982 return mapping, 'extensive'
1983
1984
1985def _GetSymmetrySubgroups(qs_ent, ent, chem_groups):
1986 """
1987 Identify the symmetry (either cyclic C or dihedral D) of the protein and find
1988 all possible symmetry subgroups. This is done testing all combination of chain
1989 superposition and clustering them by the angles (D) or the axis (C) of the Rt
1990 operator.
1991
1992 Groups of superposition which can fully reconstruct the structure are possible
1993 symmetry subgroups.
1994
1995 :param qs_ent: Entity with cached angles and axis.
1996 :type qs_ent: :class:`QSscoreEntity`
1997 :param ent: Entity from which to extract chains (perfect alignment assumed
1998 for superposition as in :attr:`QSscorer.ent_to_cm_1`).
1999 :type ent: :class:`EntityHandle` or :class:`EntityView`
2000 :param chem_groups: List of tuples/lists of chain names in *ent*. Each list
2001 contains all chains belonging to a chem. group (could be
2002 from :attr:`QSscoreEntity.chem_groups` or from "keys()"
2003 or "values()" of :attr:`QSscorer.chem_mapping`)
2004
2005 :return: A list of possible symmetry subgroups (each in same format as
2006 :attr:`QSscorer.symm_1`). If no symmetry is found, we return a list
2007 with a single symmetry subgroup with a single group with all chains.
2008 """
2009 # get angles / axis for pairwise transformations within same chem. group
2010 angles = {}
2011 axis = {}
2012 for cnames in chem_groups:
2013 for c1, c2 in itertools.combinations(cnames, 2):
2014 cur_angles = qs_ent.GetAngles(c1, c2)
2015 if not np.any(np.isnan(cur_angles)):
2016 angles[(c1,c2)] = cur_angles
2017 cur_axis = qs_ent.GetAxis(c1, c2)
2018 if not np.any(np.isnan(cur_axis)):
2019 axis[(c1,c2)] = cur_axis
2020
2021 # cluster superpositions angles at different thresholds
2022 symm_groups = []
2023 LogVerbose('Possible symmetry-groups for: %s' % ent.GetName())
2024 for angle_thr in np.arange(0.1, 1, 0.1):
2025 d_symm_groups = _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)
2026 if d_symm_groups:
2027 for group in d_symm_groups:
2028 if group not in symm_groups:
2029 symm_groups.append(group)
2030 LogVerbose('Dihedral: %s' % group)
2031 LogInfo('Symmetry threshold %.1f used for angles of %s' \
2032 % (angle_thr, ent.GetName()))
2033 break
2034
2035 # cluster superpositions axis at different thresholds
2036 for axis_thr in np.arange(0.1, 1, 0.1):
2037 c_symm_groups = _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
2038 if c_symm_groups:
2039 for group in c_symm_groups:
2040 if group not in symm_groups:
2041 symm_groups.append(group)
2042 LogVerbose('Cyclic: %s' % group)
2043 LogInfo('Symmetry threshold %.1f used for axis of %s' \
2044 % (axis_thr, ent.GetName()))
2045 break
2046
2047 # fall back to single "group" containing all chains if none found
2048 if not symm_groups:
2049 LogInfo('No symmetry-group detected in %s' % ent.GetName())
2050 symm_groups = [[tuple([c for g in chem_groups for c in g])]]
2051
2052 return symm_groups
2053
2054def _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr):
2055 """
2056 :return: Dihedral subgroups for :func:`_GetSymmetrySubgroups`
2057 (same return type as there). Empty list if fail.
2058
2059 :param ent: See :func:`_GetSymmetrySubgroups`
2060 :param chem_groups: See :func:`_GetSymmetrySubgroups`
2061 :param angles: :class:`dict` (key = chain-pair-tuple, value = Euler angles)
2062 :param angle_thr: Euler angles distance threshold for clustering (float).
2063 """
2064 # cluster superpositions angles
2065 if len(angles) == 0:
2066 # nothing to be done here
2067 return []
2068 else:
2069 same_angles = _ClusterData(angles, angle_thr, _AngleArrayDistance)
2070
2071 # get those which are non redundant and covering all chains
2072 symm_groups = []
2073 for clst in list(same_angles.values()):
2074 group = list(clst.keys())
2075 if _ValidChainGroup(group, ent):
2076 if len(chem_groups) > 1:
2077 # if hetero, we want to group toghether different chains only
2078 symm_groups.append(list(zip(*group)))
2079 else:
2080 # if homo, we also want pairs
2081 symm_groups.append(group)
2082 symm_groups.append(list(zip(*group)))
2083 return symm_groups
2084
2085def _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr):
2086 """
2087 :return: Cyclic subgroups for :func:`_GetSymmetrySubgroups`
2088 (same return type as there). Empty list if fail.
2089
2090 :param ent: See :func:`_GetSymmetrySubgroups`
2091 :param chem_groups: See :func:`_GetSymmetrySubgroups`
2092 :param angles: :class:`dict` (key = chain-pair-tuple, value = rotation axis)
2093 :param angle_thr: Axis distance threshold for clustering (float).
2094 """
2095 # cluster superpositions axis
2096 if len(axis) == 0:
2097 # nothing to be done here
2098 return []
2099 else:
2100 same_axis = _ClusterData(axis, axis_thr, _AxisDistance)
2101
2102 # use to get grouping
2103 symm_groups = []
2104 for clst in list(same_axis.values()):
2105 all_chain = [item for sublist in list(clst.keys()) for item in sublist]
2106 if len(set(all_chain)) == ent.chain_count:
2107 # if we have an hetero we can exploit cyclic symmetry for grouping
2108 if len(chem_groups) > 1:
2109 ref_chem_group = chem_groups[0]
2110 # try two criteria for grouping
2111 cyclic_group_closest = []
2112 cyclic_group_iface = []
2113 for c1 in ref_chem_group:
2114 group_closest = [c1]
2115 group_iface = [c1]
2116 for chains in chem_groups[1:]:
2117 # center of atoms distance
2118 closest = _GetClosestChain(ent, c1, chains)
2119 # chain with bigger interface
2120 closest_iface = _GetClosestChainInterface(ent, c1, chains)
2121 group_closest.append(closest)
2122 group_iface.append(closest_iface)
2123 cyclic_group_closest.append(tuple(group_closest))
2124 cyclic_group_iface.append(tuple(group_iface))
2125 if _ValidChainGroup(cyclic_group_closest, ent):
2126 symm_groups.append(cyclic_group_closest)
2127 if _ValidChainGroup(cyclic_group_iface, ent):
2128 symm_groups.append(cyclic_group_iface)
2129 # if it is a homo, there's not much we can group
2130 else:
2131 symm_groups.append(chem_groups)
2132 return symm_groups
2133
2134def _ClusterData(data, thr, metric):
2135 """Wrapper for fclusterdata to get dict of clusters.
2136
2137 :param data: :class:`dict` (keys for ID, values used for clustering)
2138 :return: :class:`dict` {cluster_idx: {data-key: data-value}}
2139 """
2140 # special case length 1
2141 if len(data) == 1:
2142 return {0: {list(data.keys())[0]: list(data.values())[0]} }
2143 # do the clustering
2144 cluster_indices = fclusterdata(np.asarray(list(data.values())), thr,
2145 method='complete', criterion='distance',
2146 metric=metric)
2147 # fclusterdata output is cluster ids -> put into dict of clusters
2148 res = {}
2149 for cluster_idx, data_key in zip(cluster_indices, list(data.keys())):
2150 if not cluster_idx in res:
2151 res[cluster_idx] = {}
2152 res[cluster_idx][data_key] = data[data_key]
2153 return res
2154
2156 """
2157 :return: Average angular distance of two arrays of angles.
2158 :param u: Euler angles.
2159 :param v: Euler angles.
2160 """
2161 dist = []
2162 for a,b in zip(u,v):
2163 delta = abs(a-b)
2164 if delta > np.pi:
2165 delta = abs(2*np.pi - delta)
2166 dist.append(delta)
2167 return np.mean(dist)
2168
2170 """
2171 :return: Euclidean distance between two rotation axes. Axes can point in
2172 either direction so we ensure to use the closer one.
2173 :param u: Rotation axis.
2174 :param v: Rotation axis.
2175 """
2176 # get axes as arrays (for convenience)
2177 u = np.asarray(u)
2178 v = np.asarray(v)
2179 # get shorter of two possible distances (using v or -v)
2180 dv1 = u - v
2181 dv2 = u + v
2182 d1 = np.dot(dv1, dv1)
2183 d2 = np.dot(dv2, dv2)
2184 return np.sqrt(min(d1, d2))
2185
2186def _GetClosestChain(ent, ref_chain, chains):
2187 """
2188 :return: Chain closest to *ref_chain* based on center of atoms distance.
2189 :rtype: :class:`str`
2190 :param ent: See :func:`_GetSymmetrySubgroups`
2191 :param ref_chain: We look for chains closest to this one
2192 :type ref_chain: :class:`str`
2193 :param chains: We only consider these chains
2194 :type chains: :class:`list` of :class:`str`
2195 """
2196 contacts = []
2197 ref_center = ent.FindChain(ref_chain).center_of_atoms
2198 for ch in chains:
2199 center = ent.FindChain(ch).center_of_atoms
2200 contacts.append((geom.Distance(ref_center, center), ch))
2201 closest_chain = min(contacts)[1]
2202 return closest_chain
2203
2204def _GetClosestChainInterface(ent, ref_chain, chains):
2205 """
2206 :return: Chain with biggest interface (within 10 A) to *ref_chain*.
2207 :rtype: :class:`str`
2208 :param ent: See :func:`_GetSymmetrySubgroups`
2209 :param ref_chain: We look for chains closest to this one
2210 :type ref_chain: :class:`str`
2211 :param chains: We only consider these chains
2212 :type chains: :class:`list` of :class:`str`
2213 """
2214 # NOTE: this is computed on a subset of the full biounit and might be
2215 # inaccurate. Also it could be extracted from QSscoreEntity.contacts.
2216 closest = []
2217 for ch in chains:
2218 iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \
2219 % (ref_chain, ch))
2220 nr_res = iface_view.residue_count
2221 closest.append((nr_res, ch))
2222 closest_chain = max(closest)[1]
2223 return closest_chain
2224
2225def _ValidChainGroup(group, ent):
2226 """
2227 :return: True, if *group* has unique chain names and as many chains as *ent*
2228 :rtype: :class:`bool`
2229 :param group: Symmetry groups to check
2230 :type group: Same as :attr:`QSscorer.symm_1`
2231 :param ent: See :func:`_GetSymmetrySubgroups`
2232 """
2233 all_chain = [item for sublist in group for item in sublist]
2234 if len(all_chain) == len(set(all_chain)) and\
2235 len(all_chain) == ent.chain_count:
2236 return True
2237 return False
2238
2239def _LimitChemMapping(chem_mapping, limit_1, limit_2):
2240 """
2241 :return: Chem. mapping containing only chains in *limit_1* and *limit_2*
2242 :rtype: Same as :attr:`QSscorer.chem_mapping`
2243 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2244 :param limit_1: Limits chain names in chem_mapping.keys()
2245 :type limit_1: List/tuple of strings
2246 :param limit_2: Limits chain names in chem_mapping.values()
2247 :type limit_2: List/tuple of strings
2248 """
2249 # use set intersection to keep only mapping for chains in limit_X
2250 limit_1_set = set(limit_1)
2251 limit_2_set = set(limit_2)
2252 asu_chem_mapping = dict()
2253 for key, value in chem_mapping.items():
2254 new_key = tuple(set(key) & limit_1_set)
2255 if new_key:
2256 asu_chem_mapping[new_key] = tuple(set(value) & limit_2_set)
2257 return asu_chem_mapping
2258
2259
2260def _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping):
2261 """
2262 :return: Dictionary of number of mappings and superpositions to be performed.
2263 Returned as *result[X][Y] = number* with X = "intra" or "inter" and
2264 Y = "mappings" or "superpositions". The idea is that for each
2265 pairwise superposition we check all possible mappings.
2266 We can check the combinations within (intra) a symmetry group and
2267 once established, we check the combinations between different (inter)
2268 symmetry groups.
2269 :param symm_1: See :attr:`QSscorer.symm_1`
2270 :param symm_2: See :attr:`QSscorer.symm_2`
2271 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2272 """
2273 # setup
2274 c = {}
2275 c['intra'] = {}
2276 c['inter'] = {}
2277 c['intra']['mappings'] = 0
2278 c['intra']['superpositions'] = 0
2279 c['inter']['mappings'] = 0
2280 c['inter']['superpositions'] = 0
2281 # intra ASU mappings within reference symmetry groups
2282 ref_symm_1 = symm_1[0]
2283 ref_symm_2 = symm_2[0]
2284 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2285 for g1, g2 in asu_chem_mapping.items():
2286 chain_superpositions = len(g1) * len(g2)
2287 c['intra']['superpositions'] += chain_superpositions
2288 map_per_sup = _PermutationOrCombinations(g1[0], g2[0], asu_chem_mapping)
2289 c['intra']['mappings'] += chain_superpositions * map_per_sup
2290 if len(symm_1) == 1 or len(symm_2) == 1:
2291 return c
2292 # inter ASU mappings
2293 asu_superposition = min(len(symm_1), len(symm_2))
2294 c['inter']['superpositions'] = asu_superposition
2295 asu = {tuple(symm_1): tuple(symm_2)}
2296 map_per_sup = _PermutationOrCombinations(ref_symm_1, ref_symm_2, asu)
2297 c['inter']['mappings'] = asu_superposition * map_per_sup
2298 return c
2299
2300def _PermutationOrCombinations(sup1, sup2, chem_mapping):
2301 """Should match len(_ListPossibleMappings(sup1, sup2, chem_mapping))."""
2302 # remove superposed elements, put smallest complex as key
2303 chem_map = {}
2304 for a,b in chem_mapping.items():
2305 new_a = tuple([x for x in a if x != sup1])
2306 new_b = tuple([x for x in b if x != sup2])
2307 chem_map[new_a] = new_b
2308 mapp_nr = 1.0
2309 for a, b in chem_map.items():
2310 if len(a) == len(b):
2311 mapp_nr *= factorial(len(a))
2312 elif len(a) < len(b):
2313 mapp_nr *= binom(len(b), len(a))
2314 elif len(a) > len(b):
2315 mapp_nr *= binom(len(a), len(b))
2316 return int(mapp_nr)
2317
2318def _ListPossibleMappings(sup1, sup2, chem_mapping):
2319 """
2320 Return a flat list of all possible mappings given *chem_mapping* and keeping
2321 mapping of *sup1* and *sup2* fixed. For instance if elements are chain names
2322 this is all the mappings to check for a given superposition.
2323
2324 Elements in first complex are defined by *chem_mapping.keys()* (list of list
2325 of elements) and elements in second complex by *chem_mapping.values()*. If
2326 complexes don't have same number of elements, we map only elements for the one
2327 with less. Also mapping is only between elements of mapped groups according to
2328 *chem_mapping*.
2329
2330 :rtype: :class:`list` of :class:`dict` (key = element in chem_mapping-key,
2331 value = element in chem_mapping-value)
2332 :param sup1: Element for which mapping is fixed.
2333 :type sup1: Like element in chem_mapping-key
2334 :param sup2: Element for which mapping is fixed.
2335 :type sup2: Like element in chem_mapping-value
2336 :param chem_mapping: Defines mapping between groups of elements (e.g. result
2337 of :func:`_LimitChemMapping`).
2338 :type chem_mapping: :class:`dict` with key / value = :class:`tuple`
2339
2340 :raises: :class:`QSscoreError` if reference complex (first one or one with
2341 less elements) has more elements for any given mapped group.
2342 """
2343 # find smallest complex
2344 chain1 = [i for s in list(chem_mapping.keys()) for i in s]
2345 chain2 = [i for s in list(chem_mapping.values()) for i in s]
2346 swap = False
2347 if len(chain1) > len(chain2):
2348 swap = True
2349 # remove superposed elements, put smallest complex as key
2350 chem_map = {}
2351 for a, b in chem_mapping.items():
2352 new_a = tuple([x for x in a if x != sup1])
2353 new_b = tuple([x for x in b if x != sup2])
2354 if swap:
2355 chem_map[new_b] = new_a
2356 else:
2357 chem_map[new_a] = new_b
2358 # generate permutations or combinations of chemically
2359 # equivalent chains
2360 chem_perm = []
2361 chem_ref = []
2362 for a, b in chem_map.items():
2363 if len(a) == len(b):
2364 chem_perm.append(list(itertools.permutations(b)))
2365 chem_ref.append(a)
2366 elif len(a) < len(b):
2367 chem_perm.append(list(itertools.combinations(b, len(a))))
2368 chem_ref.append(a)
2369 else:
2370 raise QSscoreError('Impossible to define reference group: %s' \
2371 % str(chem_map))
2372 # save the list of possible mappings
2373 mappings = []
2374 flat_ref = [i for s in chem_ref for i in s]
2375 for perm in itertools.product(*chem_perm):
2376 flat_perm = [i for s in perm for i in s]
2377 d = {c1: c2 for c1, c2 in zip(flat_ref, flat_perm)}
2378 if swap:
2379 d = {v: k for k, v in list(d.items())}
2380 d.update({sup1: sup2})
2381 mappings.append(d)
2382 return mappings
2383
2384
2385def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping,
2386 sup_thr=4, sup_fract=0.8, find_best=True):
2387 """
2388 Quick check if we can superpose two chains and get a mapping for all other
2389 chains using the same transformation. The mapping is defined by sufficient
2390 overlap of the transformed chain of *ent_1* onto another chain in *ent_2*.
2391
2392 :param ent_1: Entity to map to *ent_2* (perfect alignment assumed between
2393 chains of same chem. group as in :attr:`QSscorer.ent_to_cm_1`).
2394 Views are ok but only to select full chains!
2395 :param ent_2: Entity to map to (perfect alignment assumed between
2396 chains of same chem. group as in :attr:`QSscorer.ent_to_cm_2`).
2397 Views are ok but only to select full chains!
2398 :param symm_1: Symmetry groups to use. We only superpose chains within
2399 reference symmetry group of *symm_1* and *symm_2*.
2400 See :attr:`QSscorer.symm_1`
2401 :param symm_2: See :attr:`QSscorer.symm_2`
2402 :param chem_mapping: See :attr:`QSscorer.chem_mapping`.
2403 All chains in *ent_1* / *ent_2* must be listed here!
2404 :param sup_thr: Distance around transformed chain in *ent_1* to check for
2405 overlap.
2406 :type sup_thr: :class:`float`
2407 :param sup_fract: Fraction of atoms in chain of *ent_2* that must be
2408 overlapped for overlap to be sufficient.
2409 :type sup_fract: :class:`float`
2410 :param find_best: If True, we look for best mapping according to
2411 :func:`_GetMappedRMSD`. Otherwise, we return first suitable
2412 mapping.
2413 :type find_best: :class:`bool`
2414
2415 :return: Mapping from *ent_1* to *ent_2* or None if none found. Mapping, if
2416 found, is as in :attr:`QSscorer.chain_mapping`.
2417 :rtype: :class:`dict` (:class:`str` / :class:`str`)
2418 """
2419 # limit chem mapping to reference symmetry group
2420 ref_symm_1 = symm_1[0]
2421 ref_symm_2 = symm_2[0]
2422 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2423 # for each chemically identical group
2424 rmsd_mappings = []
2425 for g1, g2 in asu_chem_mapping.items():
2426 # try to superpose all possible pairs
2427 # -> note that some chain-chain combinations may work better than others
2428 # to superpose the full oligomer (e.g. if some chains are open/closed)
2429 for c1, c2 in itertools.product(g1, g2):
2430 # get superposition transformation
2431 chain_1 = ent_1.Select('cname="%s"' % c1)
2432 chain_2 = ent_2.Select('cname="%s"' % c2)
2433 res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2434 # look for overlaps
2435 mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping,
2436 res.transformation, sup_thr,
2437 sup_fract)
2438 if not mapping:
2439 continue
2440 # early abort if we only want the first one
2441 if not find_best:
2442 return mapping
2443 else:
2444 # get RMSD for sorting
2445 rmsd = _GetMappedRMSD(ent_1, ent_2, mapping, res.transformation)
2446 rmsd_mappings.append((rmsd, mapping))
2447 # return best mapping
2448 if rmsd_mappings:
2449 return min(rmsd_mappings, key=lambda x: x[0])[1]
2450 else:
2451 return None
2452
2453def _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation,
2454 sup_thr, sup_fract):
2455 """
2456 :return: Dict with chain mapping from *ent_1* to *ent_2* or None if failed
2457 (see :func:`_CheckClosedSymmetry`).
2458 :param ent_1: See :func:`_CheckClosedSymmetry`
2459 :param ent_2: See :func:`_CheckClosedSymmetry`
2460 :param chem_mapping: See :func:`_CheckClosedSymmetry`
2461 :param transformation: Superposition transformation to be applied to *ent_1*.
2462 :param sup_thr: See :func:`_CheckClosedSymmetry`
2463 :param sup_fract: See :func:`_CheckClosedSymmetry`
2464 """
2465 # NOTE: this seems to be the comput. most expensive part in QS scoring
2466 # -> it could be moved to C++ for speed-up
2467 # -> the next expensive bits are ClustalW and GetContacts btw...
2468
2469 # swap if needed (ent_1 must be shorter or same)
2470 if ent_1.chain_count > ent_2.chain_count:
2471 swap = True
2472 ent_1, ent_2 = ent_2, ent_1
2473 transformation = geom.Invert(transformation)
2474 chem_pairs = list(zip(list(chem_mapping.values()), list(chem_mapping.keys())))
2475 else:
2476 swap = False
2477 chem_pairs = iter(chem_mapping.items())
2478 # sanity check
2479 if ent_1.chain_count == 0:
2480 return None
2481 # extract valid partners for each chain in ent_1
2482 chem_partners = dict()
2483 for cg1, cg2 in chem_pairs:
2484 for ch in cg1:
2485 chem_partners[ch] = set(cg2)
2486 # get mapping for each chain in ent_1
2487 mapping = dict()
2488 mapped_chains = set()
2489 for ch_1 in ent_1.chains:
2490 # get all neighbors split by chain (NOTE: this muight be moved to C++)
2491 ch_atoms = {ch_2.name: set() for ch_2 in ent_2.chains}
2492 for a_1 in ch_1.handle.atoms:
2493 transformed_pos = geom.Vec3(transformation * geom.Vec4(a_1.pos))
2494 a_within = ent_2.FindWithin(transformed_pos, sup_thr)
2495 for a_2 in a_within:
2496 ch_atoms[a_2.chain.name].add(a_2.hash_code)
2497 # get one with most atoms in overlap
2498 max_num, max_name = max((len(atoms), name)
2499 for name, atoms in ch_atoms.items())
2500 # early abort if overlap fraction not good enough
2501 ch_2 = ent_2.FindChain(max_name)
2502 if max_num == 0 or max_num / float(ch_2.handle.atom_count) < sup_fract:
2503 return None
2504 # early abort if mapped to chain of different chem. group
2505 ch_1_name = ch_1.name
2506 if ch_1_name not in chem_partners:
2507 raise RuntimeError("Chem. mapping doesn't contain all chains!")
2508 if max_name not in chem_partners[ch_1_name]:
2509 return None
2510 # early abort if multiple ones mapped to same chain
2511 if max_name in mapped_chains:
2512 return None
2513 # set mapping
2514 mapping[ch_1_name] = max_name
2515 mapped_chains.add(max_name)
2516 # unswap if needed and return
2517 if swap:
2518 mapping = {v: k for k, v in mapping.items()}
2519 return mapping
2520
2521def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation):
2522 """
2523 :return: RMSD between complexes considering chain mapping.
2524 :param ent_1: Entity mapped to *ent_2* (perfect alignment assumed between
2525 mapped chains as in :attr:`QSscorer.ent_to_cm_1`).
2526 :param ent_2: Entity which was mapped to (perfect alignment assumed between
2527 mapped chains as in :attr:`QSscorer.ent_to_cm_2`).
2528 :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2529 :param transformation: Superposition transformation to be applied to *ent_1*.
2530 """
2531 # collect RMSDs and atom counts chain by chain and combine afterwards
2532 rmsds = []
2533 atoms = []
2534 for c1, c2 in chain_mapping.items():
2535 # get views and atom counts
2536 chain_1 = ent_1.Select('cname="%s"' % c1)
2537 chain_2 = ent_2.Select('cname="%s"' % c2)
2538 atom_count = chain_1.atom_count
2539 if atom_count != chain_2.atom_count:
2540 raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!')
2541 # get RMSD
2542 rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2543 # update lists
2544 rmsds.append(rmsd)
2545 atoms.append(float(atom_count))
2546 # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2547 rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2548 return rmsd
2549
2551 """Helper object for repetitive RMSD calculations.
2552 Meant to speed up :func:`_GetChainMapping` but could also be used to replace
2553 :func:`_GetMappedRMSD` in :func:`_CheckClosedSymmetry`.
2554
2555 :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
2556 :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
2557 """
2558 def __init__(self, ent_1, ent_2):
2559 # initialize caches and keep entities
2560 self.ent_1 = ent_1
2561 self.ent_2 = ent_2
2562 self._chain_views_1 = dict()
2563 self._chain_views_2 = dict()
2564 self._pair_rmsd = dict() # key = (c1,c2), value = (atom_count,rmsd)
2565
2566 def GetChainView1(self, cname):
2567 """Get cached view on chain *cname* for :attr:`ent_1`."""
2568 if cname not in self._chain_views_1:
2569 self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname)
2570 return self._chain_views_1[cname]
2571
2572 def GetChainView2(self, cname):
2573 """Get cached view on chain *cname* for :attr:`ent_2`."""
2574 if cname not in self._chain_views_2:
2575 self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname)
2576 return self._chain_views_2[cname]
2577
2578 def GetSuperposition(self, c1, c2):
2579 """Get superposition result (no change in entities!) for *c1* to *c2*.
2580 This invalidates cached RMSD results used in :func:`GetMappedRMSD`.
2581
2582 :param c1: chain name for :attr:`ent_1`.
2583 :param c2: chain name for :attr:`ent_2`.
2584 """
2585 # invalidate _pair_rmsd
2586 self._pair_rmsd = dict()
2587 # superpose
2588 chain_1 = self.GetChainView1(c1)
2589 chain_2 = self.GetChainView2(c2)
2590 if chain_1.atom_count != chain_2.atom_count:
2591 raise RuntimeError('Chains in GetSuperposition not perfectly aligned!')
2592 return mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2593
2594 def GetMappedRMSD(self, chain_mapping, transformation):
2595 """
2596 :return: RMSD between complexes considering chain mapping.
2597 :param chain_mapping: See :attr:`QSscorer.chain_mapping`.
2598 :param transformation: Superposition transformation (e.g. res.transformation
2599 for res = :func:`GetSuperposition`).
2600 """
2601 # collect RMSDs and atom counts chain by chain and combine afterwards
2602 rmsds = []
2603 atoms = []
2604 for c1, c2 in chain_mapping.items():
2605 # cached?
2606 if (c1, c2) in self._pair_rmsd:
2607 atom_count, rmsd = self._pair_rmsd[(c1, c2)]
2608 else:
2609 # get views and atom counts
2610 chain_1 = self.GetChainView1(c1)
2611 chain_2 = self.GetChainView2(c2)
2612 atom_count = chain_1.atom_count
2613 if atom_count != chain_2.atom_count:
2614 raise RuntimeError('Chains in GetMappedRMSD not perfectly aligned!')
2615 # get RMSD
2616 rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2617 self._pair_rmsd[(c1, c2)] = (atom_count, rmsd)
2618 # update lists
2619 rmsds.append(rmsd)
2620 atoms.append(float(atom_count))
2621 # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2622 rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2623 return rmsd
2624
2625
2626def _CleanUserSymmetry(symm, ent):
2627 """Clean-up user provided symmetry.
2628
2629 :param symm: Symmetry group as in :attr:`QSscorer.symm_1`
2630 :param ent: Entity from which to extract chain names
2631 :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
2632 :return: New symmetry group limited to chains in sub-structure *ent*
2633 (see :attr:`QSscorer.symm_1`). Empty list if invalid symmetry.
2634 """
2635 # restrict symm to only contain chains in ent
2636 chain_names = set([ch.name for ch in ent.chains])
2637 new_symm = list()
2638 for symm_group in symm:
2639 new_group = tuple(ch for ch in symm_group if ch in chain_names)
2640 if new_group:
2641 new_symm.append(new_group)
2642 # report it
2643 if new_symm != symm:
2644 LogInfo("Cleaned user symmetry for %s: %s" % (ent.GetName(), new_symm))
2645 # do all groups have same length?
2646 lengths = [len(symm_group) for symm_group in new_symm]
2647 if lengths[1:] != lengths[:-1]:
2648 LogWarning('User symmetry group of different sizes for %s. Ignoring it!' \
2649 % (ent.GetName()))
2650 return []
2651 # do we cover all chains?
2652 s_chain_names = set([ch for symm_group in new_symm for ch in symm_group])
2653 if len(s_chain_names) != sum(lengths):
2654 LogWarning('User symmetry group for %s has duplicate chains. Ignoring it!' \
2655 % (ent.GetName()))
2656 return []
2657 if s_chain_names != chain_names:
2658 LogWarning('User symmetry group for %s is missing chains. Ignoring it!' \
2659 % (ent.GetName()))
2660 return []
2661 # ok all good
2662 return new_symm
2663
2664def _AreValidSymmetries(symm_1, symm_2):
2665 """Check symmetry pair for major problems.
2666
2667 :return: False if any of the two is empty or if they're compatible in size.
2668 :rtype: :class:`bool`
2669 """
2670 if not symm_1 or not symm_2:
2671 return False
2672 if len(symm_1) != 1 or len(symm_2) != 1:
2673 if not len(symm_1[0]) == len(symm_2[0]):
2674 LogWarning('Symmetry groups of different sizes. Ignoring them!')
2675 return False
2676 return True
2677
2678def _GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment):
2679 """
2680 :return: Alignments of 2 structures given chain mapping
2681 (see :attr:`QSscorer.alignments`).
2682 :param ent_1: Entity containing all chains in *chain_mapping.keys()*.
2683 Views to this entity attached to first sequence of each aln.
2684 :param ent_2: Entity containing all chains in *chain_mapping.values()*.
2685 Views to this entity attached to second sequence of each aln.
2686 :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2687 :param res_num_alignment: See :attr:`QSscorer.res_num_alignment`
2688 """
2689 alns = list()
2690 for ch_1_name in sorted(chain_mapping):
2691 # get both sequences incl. attached view
2692 ch_1 = ent_1.FindChain(ch_1_name)
2693 ch_2 = ent_2.FindChain(chain_mapping[ch_1_name])
2694 if res_num_alignment:
2695 max_res_num = max([r.number.GetNum() for r in ch_1.residues] +
2696 [r.number.GetNum() for r in ch_2.residues])
2697 ch1_aln = ["-"] * max_res_num
2698 ch2_aln = ["-"] * max_res_num
2699 for res in ch_1.residues:
2700 ch1_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2701 ch1_aln = "".join(ch1_aln)
2702 seq_1 = seq.CreateSequence(ch_1.name, str(ch1_aln))
2703 seq_1.AttachView(ch_1.Select(""))
2704 for res in ch_2.residues:
2705 ch2_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2706 ch2_aln = "".join(ch2_aln)
2707 seq_2 = seq.CreateSequence(ch_2.name, str(ch2_aln))
2708 seq_2.AttachView(ch_2.Select(""))
2709 # Create alignment
2710 aln = seq.CreateAlignment()
2711 aln.AddSequence(seq_1)
2712 aln.AddSequence(seq_2)
2713 else:
2714 seq_1 = seq.SequenceFromChain(ch_1.name, ch_1)
2715 seq_2 = seq.SequenceFromChain(ch_2.name, ch_2)
2716 # align them
2717 aln = _AlignAtomSeqs(seq_1, seq_2)
2718 if aln:
2719 alns.append(aln)
2720 return alns
2721
2723 """
2724 :return: Mapping of shared residues in *alns* (with views attached)
2725 (see :attr:`QSscorer.mapped_residues`).
2726 :param alns: See :attr:`QSscorer.alignments`
2727 """
2728 mapped_residues = dict()
2729 for aln in alns:
2730 # prepare dict for c1
2731 c1 = aln.GetSequence(0).name
2732 mapped_residues[c1] = dict()
2733 # get shared residues
2734 v1, v2 = seq.ViewsFromAlignment(aln)
2735 for res_1, res_2 in zip(v1.residues, v2.residues):
2736 r1 = res_1.number.num
2737 r2 = res_2.number.num
2738 mapped_residues[c1][r1] = r2
2739
2740 return mapped_residues
2741
2742def _GetExtraWeights(contacts, done, mapped_residues):
2743 """Return sum of extra weights for contacts of chains in set and not in done.
2744 :return: Tuple (weight_extra_mapped, weight_extra_all).
2745 weight_extra_mapped only sums if both cX,rX in mapped_residues
2746 weight_extra_all sums all
2747 :param contacts: See :func:`GetContacts` for first entity
2748 :param done: List of (c1, c2, r1, r2) tuples to ignore
2749 :param mapped_residues: See :func:`_GetMappedResidues`
2750 """
2751 weight_extra_mapped = 0
2752 weight_extra_non_mapped = 0
2753 for c1 in contacts:
2754 for c2 in contacts[c1]:
2755 for r1 in contacts[c1][c2]:
2756 for r2 in contacts[c1][c2][r1]:
2757 if (c1, c2, r1, r2) not in done:
2758 weight = _weight(contacts[c1][c2][r1][r2])
2759 if c1 in mapped_residues and r1 in mapped_residues[c1] \
2760 and c2 in mapped_residues and r2 in mapped_residues[c2]:
2761 weight_extra_mapped += weight
2762 else:
2763 weight_extra_non_mapped += weight
2764 return weight_extra_mapped, weight_extra_mapped + weight_extra_non_mapped
2765
2766def _GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping):
2767 """Get QS scores (see :class:`QSscorer`).
2768
2769 Note that if some chains are not to be considered at all, they must be removed
2770 from *contacts_1* / *contacts_2* prior to calling this.
2771
2772 :param contacts_1: See :func:`GetContacts` for first entity
2773 :param contacts_2: See :func:`GetContacts` for second entity
2774 :param mapped_residues: See :func:`_GetMappedResidues`
2775 :param chain_mapping: Maps any chain name in *mapped_residues* to chain name
2776 for *contacts_2*.
2777 :type chain_mapping: :class:`dict` (:class:`str` / :class:`str`)
2778 :return: Tuple (QS_best, QS_global) (see :attr:`QSscorer.best_score` and
2779 see :attr:`QSscorer.global_score`)
2780 """
2781 # keep track of considered contacts as set of (c1,c2,r1,r2) tuples
2782 done_1 = set()
2783 done_2 = set()
2784 weighted_scores = 0
2785 weight_sum = 0
2786 # naming cXY, rXY: X = 1/2 for contact, Y = 1/2/T for mapping (T = tmp)
2787 # -> d1 = contacts_1[c11][c21][r11][r21], d2 = contacts_2[c12][c22][r12][r22]
2788 for c11 in contacts_1:
2789 # map to other one
2790 if c11 not in mapped_residues: continue
2791 c1T = chain_mapping[c11]
2792 # second chain
2793 for c21 in contacts_1[c11]:
2794 # map to other one
2795 if c21 not in mapped_residues: continue
2796 c2T = chain_mapping[c21]
2797 # flip if needed
2798 flipped_chains = (c1T > c2T)
2799 if flipped_chains:
2800 c12, c22 = c2T, c1T
2801 else:
2802 c12, c22 = c1T, c2T
2803 # skip early if no contacts there
2804 if c12 not in contacts_2 or c22 not in contacts_2[c12]: continue
2805 # loop over res.num. in c11
2806 for r11 in contacts_1[c11][c21]:
2807 # map to other one and skip if no contacts there
2808 if r11 not in mapped_residues[c11]: continue
2809 r1T = mapped_residues[c11][r11]
2810 # loop over res.num. in c21
2811 for r21 in contacts_1[c11][c21][r11]:
2812 # map to other one and skip if no contacts there
2813 if r21 not in mapped_residues[c21]: continue
2814 r2T = mapped_residues[c21][r21]
2815 # flip if needed
2816 if flipped_chains:
2817 r12, r22 = r2T, r1T
2818 else:
2819 r12, r22 = r1T, r2T
2820 # skip early if no contacts there
2821 if r12 not in contacts_2[c12][c22]: continue
2822 if r22 not in contacts_2[c12][c22][r12]: continue
2823 # ok now we can finally do the scoring
2824 d1 = contacts_1[c11][c21][r11][r21]
2825 d2 = contacts_2[c12][c22][r12][r22]
2826 weight = _weight(min(d1, d2))
2827 weighted_scores += weight * (1 - abs(d1 - d2) / 12)
2828 weight_sum += weight
2829 # keep track of done ones
2830 done_1.add((c11, c21, r11, r21))
2831 done_2.add((c12, c22, r12, r22))
2832
2833 LogVerbose("Shared contacts: %d" % len(done_1))
2834
2835 # add the extra weights
2836 weights_extra_1 = _GetExtraWeights(contacts_1, done_1, mapped_residues)
2837 mapped_residues_2 = dict()
2838 for c1 in mapped_residues:
2839 c2 = chain_mapping[c1]
2840 mapped_residues_2[c2] = set()
2841 for r1 in mapped_residues[c1]:
2842 mapped_residues_2[c2].add(mapped_residues[c1][r1])
2843 weights_extra_2 = _GetExtraWeights(contacts_2, done_2, mapped_residues_2)
2844 weight_extra_mapped = weights_extra_1[0] + weights_extra_2[0]
2845 weight_extra_all = weights_extra_1[1] + weights_extra_2[1]
2846
2847 # get scores
2848 denom_best = weight_sum + weight_extra_mapped
2849 denom_all = weight_sum + weight_extra_all
2850 if denom_best == 0:
2851 QS_best = 0
2852 else:
2853 QS_best = weighted_scores / denom_best
2854 if denom_all == 0:
2855 QS_global = 0
2856 else:
2857 QS_global = weighted_scores / denom_all
2858 return QS_best, QS_global
2859
2860def _weight(dist):
2861 """
2862 This weight expresses the probability of two residues to interact given the CB
2863 distance (from Xu et al. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2573399/)
2864 """
2865 if dist <= 5.0:
2866 return 1
2867 x = (dist-5.0)/4.28;
2868 return np.exp(-2*x*x)
2869
2870
2872 """
2873 :return: Superposition result based on shared CA atoms in *alns*
2874 (with views attached) (see :attr:`QSscorer.superposition`).
2875 :param alns: See :attr:`QSscorer.alignments`
2876 """
2877 # check input
2878 if not alns:
2879 raise QSscoreError('No successful alignments for superposition!')
2880 # prepare views
2881 view_1 = alns[0].GetSequence(0).attached_view.CreateEmptyView()
2882 view_2 = alns[0].GetSequence(1).attached_view.CreateEmptyView()
2883 # collect CA from alignments in proper order
2884 for aln in alns:
2885 for col in aln:
2886 res_1 = col.GetResidue(0)
2887 res_2 = col.GetResidue(1)
2888 if res_1.IsValid() and res_2.IsValid():
2889 ca_1 = res_1.FindAtom('CA')
2890 ca_2 = res_2.FindAtom('CA')
2891 if ca_1.IsValid() and ca_2.IsValid():
2892 view_1.AddAtom(ca_1)
2893 view_2.AddAtom(ca_2)
2894 # superpose them without chainging entities
2895 res = mol.alg.SuperposeSVD(view_1, view_2, apply_transform=False)
2896 return res
2897
2898
2899def _AddResidue(edi, res, rnum, chain, calpha_only):
2900 """
2901 Add residue *res* with res. num. *run* to given *chain* using editor *edi*.
2902 Either all atoms added or (if *calpha_only*) only CA.
2903 """
2904 if calpha_only:
2905 ca_atom = res.FindAtom('CA')
2906 if ca_atom.IsValid():
2907 new_res = edi.AppendResidue(chain, res.name, rnum)
2908 edi.InsertAtom(new_res, ca_atom.name, ca_atom.pos)
2909 else:
2910 new_res = edi.AppendResidue(chain, res.name, rnum)
2911 for atom in res.atoms:
2912 edi.InsertAtom(new_res, atom.name, atom.pos)
2913
2914def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains):
2915 """
2916 Create two new entities (based on the alignments attached views) where all
2917 residues have same numbering (when they're aligned) and they are all pushed to
2918 a single chain X. Also append extra chains contained in *ent_1* and *ent_2*
2919 but not contained in *alns*.
2920
2921 Used for :attr:`QSscorer.lddt_ref` and :attr:`QSscorer.lddt_mdl`
2922
2923 :param alns: List of alignments with attached views (first sequence: *ent_1*,
2924 second: *ent_2*). Residue number in single chain is column index
2925 of current alignment + sum of lengths of all previous alignments
2926 (order of alignments as in input list).
2927 :type alns: See :attr:`QSscorer.alignments`
2928 :param ent_1: First entity to process.
2929 :type ent_1: :class:`~ost.mol.EntityHandle`
2930 :param ent_2: Second entity to process.
2931 :type ent_2: :class:`~ost.mol.EntityHandle`
2932 :param calpha_only: If True, we only include CA atoms instead of all.
2933 :type calpha_only: :class:`bool`
2934 :param penalize_extra_chains: If True, extra chains are added to model and
2935 reference. Otherwise, only mapped ones.
2936 :type penalize_extra_chains: :class:`bool`
2937
2938 :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*)
2939 :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle`
2940 """
2941 # first new entity
2942 ent_ren_1 = mol.CreateEntity()
2943 ed_1 = ent_ren_1.EditXCS()
2944 new_chain_1 = ed_1.InsertChain('X')
2945 # second one
2946 ent_ren_2 = mol.CreateEntity()
2947 ed_2 = ent_ren_2.EditXCS()
2948 new_chain_2 = ed_2.InsertChain('X')
2949 # the alignment already contains sorted chains
2950 rnum = 0
2951 chain_done_1 = set()
2952 chain_done_2 = set()
2953 for aln in alns:
2954 chain_done_1.add(aln.GetSequence(0).name)
2955 chain_done_2.add(aln.GetSequence(1).name)
2956 for col in aln:
2957 rnum += 1
2958 # add valid residues to single chain entities
2959 res_1 = col.GetResidue(0)
2960 if res_1.IsValid():
2961 _AddResidue(ed_1, res_1, rnum, new_chain_1, calpha_only)
2962 res_2 = col.GetResidue(1)
2963 if res_2.IsValid():
2964 _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only)
2965 # extra chains?
2966 if penalize_extra_chains:
2967 for chain in ent_1.chains:
2968 if chain.name in chain_done_1:
2969 continue
2970 for res in chain.residues:
2971 rnum += 1
2972 _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only)
2973 for chain in ent_2.chains:
2974 if chain.name in chain_done_2:
2975 continue
2976 for res in chain.residues:
2977 rnum += 1
2978 _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only)
2979 # get entity names
2980 ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName())
2981 ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName())
2982 # connect atoms
2983 if not conop.GetDefaultLib():
2984 raise RuntimeError("QSscore computation requires a compound library!")
2985 pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
2986 pr.Process(ent_ren_1)
2987 ed_1.UpdateICS()
2988 pr.Process(ent_ren_2)
2989 ed_2.UpdateICS()
2990 return ent_ren_1, ent_ren_2
2991
2992
2993# specify public interface
2994__all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts',
2995 'GetContacts', 'OligoLDDTScorer', 'MappedLDDTScorer')
Three dimensional vector class, using Real precision.
Definition vec3.hh:48
GetMappedRMSD(self, chain_mapping, transformation)
__init__(self, alignment, calpha_only, settings)
_InitScorer(self)
Class internal helpers (anything that doesnt easily work without this class)
__init__(self, ref, mdl, alignments, calpha_only, settings, penalize_extra_chains=False, chem_mapping=None)
_PrepareOligoEntities(self)
Class internal helpers.
_GetSuperposeData(self, c1, c2)
Class internal helpers (anything that doesnt easily work without this class)
Definition qsscoring.py:853
chain_mapping(self, chain_mapping)
Definition qsscoring.py:405
SetSymmetries(self, symm_1, symm_2)
Definition qsscoring.py:323
chem_mapping(self, chem_mapping)
Definition qsscoring.py:237
_ComputeAlignedEntities(self)
Class internal helpers (anything that doesnt easily work without this class)
Definition qsscoring.py:579
mapped_residues(self, mapped_residues)
Definition qsscoring.py:483
__init__(self, ent_1, ent_2, res_num_alignment=False)
Definition qsscoring.py:170
clustalw_bin(self, clustalw_bin)
Definition qsscoring.py:555
GetOligoLDDTScorer(self, settings, penalize_extra_chains=True)
Definition qsscoring.py:558
ent_to_cm_1(self, ent_to_cm_1)
Definition qsscoring.py:268
ent_to_cm_2(self, ent_to_cm_2)
Definition qsscoring.py:281
Mat2 DLLEXPORT_OST_GEOM Invert(const Mat2 &m)
Matrix inversion.
Real DLLEXPORT_OST_GEOM Distance(const Line2 &l, const Vec2 &v)
_FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping)
_AddResidue(edi, res, rnum, chain, calpha_only)
_GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain, clustalw_bin)
_GetChemGroupsMapping(qs_ent_1, qs_ent_2)
_GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment)
_ClusterData(data, thr, metric)
_GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation, sup_thr, sup_fract)
_GetSymmetrySubgroups(qs_ent, ent, chem_groups)
_GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation)
_PermutationOrCombinations(sup1, sup2, chem_mapping)
_CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping, sup_thr=4, sup_fract=0.8, find_best=True)
_LimitChemMapping(chem_mapping, limit_1, limit_2)
FilterContacts(contacts, chain_names)
Definition qsscoring.py:882
_CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
_AreValidSymmetries(symm_1, symm_2)
_SelectFew(l, max_elements)
_GetClosestChainInterface(ent, ref_chain, chains)
_GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping)
_ValidChainGroup(group, ent)
_CleanUserSymmetry(symm, ent)
_ListPossibleMappings(sup1, sup2, chem_mapping)
_GetClosestChain(ent, ref_chain, chains)
_GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
GetContacts(entity, calpha_only, dist_thr=12.0)
Definition qsscoring.py:905
_GetChemGroups(qs_ent, seqid_thr=95.)
_GetExtraWeights(contacts, done, mapped_residues)
_GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping, max_mappings_extensive)
_AlignAtomSeqs(seq_1, seq_2)
_MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains)
_FixSelectChainNames(ch_names)
_GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)