OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
qsscoring.py
Go to the documentation of this file.
1 """
2 Scoring of quaternary structures (QS). The QS scoring is according to the paper
3 by `Bertoni et al. <https://dx.doi.org/10.1038/s41598-017-09654-8>`_.
4 
5 .. note ::
6 
7  Requirements for use:
8 
9  - A default :class:`compound library <ost.conop.CompoundLib>` must be defined
10  and accessible via :func:`~ost.conop.GetDefaultLib`. This is set by default
11  when executing scripts with ``ost``. Otherwise, you must set this with
12  :func:`~ost.conop.SetDefaultLib`.
13  - ClustalW must be installed (unless you provide chain mappings)
14  - Python modules `numpy` and `scipy` must be installed and available
15  (e.g. use ``pip install scipy numpy``)
16 """
17 # Original authors: Gerardo Tauriello, Martino Bertoni
18 
19 from ost import mol, geom, conop, seq, settings, PushVerbosityLevel
20 from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
21 from ost.bindings.clustalw import ClustalW
22 from ost.mol.alg import lDDTScorer
23 from ost.seq.alg.renumber import Renumber
24 import numpy as np
25 from scipy.misc import factorial
26 from scipy.special import binom
27 from scipy.cluster.hierarchy import fclusterdata
28 import itertools
29 
30 ###############################################################################
31 # QS scoring
32 ###############################################################################
33 
34 class QSscoreError(Exception):
35  """Exception to be raised for "acceptable" exceptions in QS scoring.
36 
37  Those are cases we might want to capture for default behavior.
38  """
39  pass
40 
41 class QSscorer:
42  """Object to compute QS scores.
43 
44  Simple usage without any precomputed contacts, symmetries and mappings:
45 
46  .. code-block:: python
47 
48  import ost
49  from ost.mol.alg import qsscoring
50 
51  # load two biounits to compare
52  ent_full = ost.io.LoadPDB('3ia3', remote=True)
53  ent_1 = ent_full.Select('cname=A,D')
54  ent_2 = ent_full.Select('cname=B,C')
55  # get score
56  ost.PushVerbosityLevel(3)
57  try:
58  qs_scorer = qsscoring.QSscorer(ent_1, ent_2)
59  ost.LogScript('QSscore:', str(qs_scorer.global_score))
60  ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping))
61  # commonly you want the QS global score as output
62  qs_score = qs_scorer.global_score
63  except qsscoring.QSscoreError as ex:
64  # default handling: report failure and set score to 0
65  ost.LogError('QSscore failed:', str(ex))
66  qs_score = 0
67 
68  For maximal performance when computing QS scores of the same entity with many
69  others, it is advisable to construct and reuse :class:`QSscoreEntity` objects.
70 
71  Any known / precomputed information can be filled into the appropriate
72  attribute here (no checks done!). Otherwise most quantities are computed on
73  first access and cached (lazy evaluation). Setters are provided to set values
74  with extra checks (e.g. :func:`SetSymmetries`).
75 
76  All necessary seq. alignments are done by global BLOSUM62-based alignment. A
77  multiple sequence alignment is performed with ClustalW unless
78  :attr:`chain_mapping` is provided manually. You will need to have an
79  executable ``clustalw`` or ``clustalw2`` in your ``PATH`` or you must set
80  :attr:`clustalw_bin` accordingly. Otherwise an exception
81  (:class:`ost.settings.FileNotFound`) is thrown.
82 
83  Formulas for QS scores:
84 
85  ::
86 
87  - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
88  - QS_global = weighted_scores / (weight_sum + weight_extra_all)
89  -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
90  -> weight_sum = sum(w(min(d1,d2))) for shared
91  -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
92  -> weight_extra_all = sum(w(d)) for all non-shared
93  -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
94 
95  In the formulas above:
96 
97  * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
98  "shared" contacts).
99  * "mapped": we could map chains of two structures and align residues in
100  :attr:`alignments`.
101  * "shared": pairs of residues which are "mapped" and have
102  "inter-chain contact" in both structures.
103  * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
104  (fallback to CA-CA if :attr:`calpha_only` is True).
105  * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
106  from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
107 
108  :param ent_1: First structure to be scored.
109  :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
110  :class:`~ost.mol.EntityView`
111  :param ent_2: Second structure to be scored.
112  :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
113  :class:`~ost.mol.EntityView`
114  :param res_num_alignment: Sets :attr:`res_num_alignment`
115 
116  :raises: :class:`QSscoreError` if input structures are invalid or are monomers
117  or have issues that make it impossible for a QS score to be computed.
118 
119  .. attribute:: qs_ent_1
120 
121  :class:`QSscoreEntity` object for *ent_1* given at construction.
122  If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
123  set it to 'pdb_1' using :func:`~QSscoreEntity.SetName`.
124 
125  .. attribute:: qs_ent_2
126 
127  :class:`QSscoreEntity` object for *ent_2* given at construction.
128  If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
129  set it to 'pdb_2' using :func:`~QSscoreEntity.SetName`.
130 
131  .. attribute:: calpha_only
132 
133  True if any of the two structures is CA-only (after cleanup).
134 
135  :type: :class:`bool`
136 
137  .. attribute:: max_ca_per_chain_for_cm
138 
139  Maximal number of CA atoms to use in each chain to determine chain mappings.
140  Setting this to -1 disables the limit. Limiting it speeds up determination
141  of symmetries and chain mappings. By default it is set to 100.
142 
143  :type: :class:`int`
144 
145  .. attribute:: max_mappings_extensive
146 
147  Maximal number of chain mappings to test for 'extensive'
148  :attr:`chain_mapping_scheme`. The extensive chain mapping search must in the
149  worst case check O(N^2) * O(N!) possible mappings for complexes with N
150  chains. Two octamers without symmetry would require 322560 mappings to be
151  checked. To limit computations, a :class:`QSscoreError` is thrown if we try
152  more than the maximal number of chain mappings.
153  The value must be set before the first use of :attr:`chain_mapping`.
154  By default it is set to 100000.
155 
156  :type: :class:`int`
157 
158  .. attribute:: res_num_alignment
159 
160  Forces each alignment in :attr:`alignments` to be based on residue numbers
161  instead of using a global BLOSUM62-based alignment.
162 
163  :type: :class:`bool`
164  """
165  def __init__(self, ent_1, ent_2, res_num_alignment=False):
166  # generate QSscoreEntity objects?
167  if isinstance(ent_1, QSscoreEntity):
168  self.qs_ent_1 = ent_1
169  else:
170  self.qs_ent_1 = QSscoreEntity(ent_1)
171  if isinstance(ent_2, QSscoreEntity):
172  self.qs_ent_2 = ent_2
173  else:
174  self.qs_ent_2 = QSscoreEntity(ent_2)
175  # check validity of inputs
176  if not self.qs_ent_1.is_valid or not self.qs_ent_2.is_valid:
177  raise QSscoreError("Invalid input in QSscorer!")
178  # set names for structures
179  if self.qs_ent_1.original_name == self.qs_ent_2.original_name:
180  self.qs_ent_1.SetName('pdb_1')
181  self.qs_ent_2.SetName('pdb_2')
182  else:
183  self.qs_ent_1.SetName(self.qs_ent_1.original_name)
184  self.qs_ent_2.SetName(self.qs_ent_2.original_name)
185  # set other public attributes
186  self.res_num_alignment = res_num_alignment
187  self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only
190  # init cached stuff
191  self._chem_mapping = None
192  self._ent_to_cm_1 = None
193  self._ent_to_cm_2 = None
194  self._symm_1 = None
195  self._symm_2 = None
196  self._chain_mapping = None
197  self._chain_mapping_scheme = None
198  self._alignments = None
199  self._mapped_residues = None
200  self._global_score = None
201  self._best_score = None
202  self._superposition = None
203  self._clustalw_bin = None
204 
205  @property
206  def chem_mapping(self):
207  """Inter-complex mapping of chemical groups.
208 
209  Each group (see :attr:`QSscoreEntity.chem_groups`) is mapped according to
210  highest sequence identity. Alignment is between longest sequences in groups.
211 
212  Limitations:
213 
214  - If different numbers of groups, we map only the groups for the complex
215  with less groups (rest considered unmapped and shown as warning)
216  - The mapping is forced: the "best" mapping will be chosen independently of
217  how low the seq. identity may be
218 
219  :getter: Computed on first use (cached)
220  :type: :class:`dict` with key = :class:`tuple` of chain names in
221  :attr:`qs_ent_1` and value = :class:`tuple` of chain names in
222  :attr:`qs_ent_2`.
223 
224  :raises: :class:`QSscoreError` if we end up having no chains for either
225  entity in the mapping (can happen if chains do not have CA atoms).
226  """
227  if self._chem_mapping is None:
228  self._chem_mapping = _GetChemGroupsMapping(self.qs_ent_1, self.qs_ent_2)
229  return self._chem_mapping
230 
231  @property
232  def ent_to_cm_1(self):
233  """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries.
234 
235  Properties:
236 
237  - Includes only residues aligned according to :attr:`chem_mapping`
238  - Includes only 1 CA atom per residue
239  - Has at least 5 and at most :attr:`max_ca_per_chain_for_cm` atoms per chain
240  - All chains of the same chemical group have the same number of atoms
241  (also in :attr:`ent_to_cm_2` according to :attr:`chem_mapping`)
242  - All chains appearing in :attr:`chem_mapping` appear in this entity
243  (so the two can be safely used together)
244 
245  This entity might be transformed (i.e. all positions rotated/translated by
246  same transformation matrix) if this can speed up computations. So do not
247  assume fixed global positions (but relative distances will remain fixed).
248 
249  :getter: Computed on first use (cached)
250  :type: :class:`~ost.mol.EntityHandle`
251 
252  :raises: :class:`QSscoreError` if any chain ends up having less than 5 res.
253  """
254  if self._ent_to_cm_1 is None:
256  return self._ent_to_cm_1
257 
258  @property
259  def ent_to_cm_2(self):
260  """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries
261  (see :attr:`ent_to_cm_1` for details).
262  """
263  if self._ent_to_cm_2 is None:
265  return self._ent_to_cm_2
266 
267  @property
268  def symm_1(self):
269  """Symmetry groups for :attr:`qs_ent_1` used to speed up chain mapping.
270 
271  This is a list of chain-lists where each chain-list can be used reconstruct
272  the others via cyclic C or dihedral D symmetry. The first chain-list is used
273  as a representative symmetry group. For heteromers, the group-members must
274  contain all different seqres in oligomer.
275 
276  Example: symm. groups [(A,B,C), (D,E,F), (G,H,I)] means that there are
277  symmetry transformations to get (D,E,F) and (G,H,I) from (A,B,C).
278 
279  Properties:
280 
281  - All symmetry group tuples have the same length (num. of chains)
282  - All chains in :attr:`ent_to_cm_1` appear (w/o duplicates)
283  - For heteros: symmetry group tuples have all different chem. groups
284  - Trivial symmetry group = one tuple with all chains (used if inconsistent
285  data provided or if no symmetry is found)
286  - Either compatible to :attr:`symm_2` or trivial symmetry groups used.
287  Compatibility requires same lengths of symmetry group tuples and it must
288  be possible to get an overlap (80% of residues covered within 6 A of a
289  (chem. mapped) chain) of all chains in representative symmetry groups by
290  superposing one pair of chains.
291 
292  :getter: Computed on first use (cached)
293  :type: :class:`list` of :class:`tuple` of :class:`str` (chain names)
294  """
295  if self._symm_1 is None:
296  self._ComputeSymmetry()
297  return self._symm_1
298 
299  @property
300  def symm_2(self):
301  """Symmetry groups for :attr:`qs_ent_2` (see :attr:`symm_1` for details)."""
302  if self._symm_2 is None:
303  self._ComputeSymmetry()
304  return self._symm_2
305 
306  def SetSymmetries(self, symm_1, symm_2):
307  """Set user-provided symmetry groups.
308 
309  These groups are restricted to chain names appearing in :attr:`ent_to_cm_1`
310  and :attr:`ent_to_cm_2` respectively. They are only valid if they cover all
311  chains and both *symm_1* and *symm_2* have same lengths of symmetry group
312  tuples. Otherwise trivial symmetry group used (see :attr:`symm_1`).
313 
314  :param symm_1: Value to set for :attr:`symm_1`.
315  :param symm_2: Value to set for :attr:`symm_2`.
316  """
317  # restrict chain names
318  self._symm_1 = _CleanUserSymmetry(symm_1, self.ent_to_cm_1)
319  self._symm_2 = _CleanUserSymmetry(symm_2, self.ent_to_cm_2)
320  # check that we have reasonable symmetries set (fallback: all chains)
321  if not _AreValidSymmetries(self._symm_1, self._symm_2):
322  self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
323  self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
324 
325  @property
326  def chain_mapping(self):
327  """Mapping from :attr:`ent_to_cm_1` to :attr:`ent_to_cm_2`.
328 
329  Properties:
330 
331  - Mapping is between chains of same chem. group (see :attr:`chem_mapping`)
332  - Each chain can appear only once in mapping
333  - All chains of complex with less chains are mapped
334  - Symmetry (:attr:`symm_1`, :attr:`symm_2`) is taken into account
335 
336  Details on algorithms used to find mapping:
337 
338  - We try all pairs of chem. mapped chains within symmetry group and get
339  superpose-transformation for them
340  - First option: check for "sufficient overlap" of other chain-pairs
341 
342  - For each chain-pair defined above: apply superposition to full oligomer
343  and map chains based on structural overlap
344  - Structural overlap = X% of residues in second oligomer covered within Y
345  Angstrom of a (chem. mapped) chain in first oligomer. We successively
346  try (X,Y) = (80,4), (40,6) and (20,8) to be less and less strict in
347  mapping (warning shown for most permissive one).
348  - If multiple possible mappings are found, we choose the one which leads
349  to the lowest multi-chain-RMSD given the superposition
350 
351  - Fallback option: try all mappings to find minimal multi-chain-RMSD
352  (warning shown)
353 
354  - For each chain-pair defined above: apply superposition, try all (!)
355  possible chain mappings (within symmetry group) and keep mapping with
356  lowest multi-chain-RMSD
357  - Repeat procedure above to resolve symmetry. Within the symmetry group we
358  can use the chain mapping computed before and we just need to find which
359  symmetry group in first oligomer maps to which in the second one. We
360  again try all possible combinations...
361  - Limitations:
362 
363  - Trying all possible mappings is a combinatorial nightmare (factorial).
364  We throw an exception if too many combinations (e.g. octomer vs
365  octomer with no usable symmetry)
366  - The mapping is forced: the "best" mapping will be chosen independently
367  of how badly they fit in terms of multi-chain-RMSD
368  - As a result, such a forced mapping can lead to a large range of
369  resulting QS scores. An extreme example was observed between 1on3.1
370  and 3u9r.1, where :attr:`global_score` can range from 0.12 to 0.43
371  for mappings with very similar multi-chain-RMSD.
372 
373  :getter: Computed on first use (cached)
374  :type: :class:`dict` with key / value = :class:`str` (chain names, key
375  for :attr:`ent_to_cm_1`, value for :attr:`ent_to_cm_2`)
376  :raises: :class:`QSscoreError` if there are too many combinations to check
377  to find a chain mapping (see :attr:`max_mappings_extensive`).
378  """
379  if self._chain_mapping is None:
381  _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2, self.symm_1,
382  self.symm_2, self.chem_mapping,
384  LogInfo('Mapping found: %s' % str(self._chain_mapping))
385  return self._chain_mapping
386 
387  @property
389  """Mapping scheme used to get :attr:`chain_mapping`.
390 
391  Possible values:
392 
393  - 'strict': 80% overlap needed within 4 Angstrom (overlap based mapping).
394  - 'tolerant': 40% overlap needed within 6 Angstrom (overlap based mapping).
395  - 'permissive': 20% overlap needed within 8 Angstrom (overlap based
396  mapping). It's best if you check mapping manually!
397  - 'extensive': Extensive search used for mapping detection (fallback). This
398  approach has known limitations and may be removed in future versions.
399  Mapping should be checked manually!
400  - 'user': :attr:`chain_mapping` was set by user before first use of this
401  attribute.
402 
403  :getter: Computed with :attr:`chain_mapping` on first use (cached)
404  :type: :class:`str`
405  :raises: :class:`QSscoreError` as in :attr:`chain_mapping`.
406  """
407  if self._chain_mapping_scheme is None:
408  # default: user provided
409  self._chain_mapping_scheme = 'user'
410  # get chain mapping and make sure internal variable is set
411  # -> will not compute and only update _chain_mapping if user provided
412  # -> will compute and overwrite _chain_mapping_scheme else
413  self._chain_mapping = self.chain_mapping
414  return self._chain_mapping_scheme
415 
416  @property
417  def alignments(self):
418  """List of successful sequence alignments using :attr:`chain_mapping`.
419 
420  There will be one alignment for each mapped chain and they are ordered by
421  their chain names in :attr:`qs_ent_1`.
422 
423  The first sequence of each alignment belongs to :attr:`qs_ent_1` and the
424  second one to :attr:`qs_ent_2`. The sequences are named according to the
425  mapped chain names and have views attached into :attr:`QSscoreEntity.ent`
426  of :attr:`qs_ent_1` and :attr:`qs_ent_2`.
427 
428  If :attr:`res_num_alignment` is False, each alignment is performed using a
429  global BLOSUM62-based alignment. Otherwise, the positions in the alignment
430  sequences are simply given by the residue number so that residues with
431  matching numbers are aligned.
432 
433  :getter: Computed on first use (cached)
434  :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
435  """
436  if self._alignments is None:
437  self._alignments = _GetMappedAlignments(self.qs_ent_1.ent,
438  self.qs_ent_2.ent,
439  self.chain_mapping,
440  self.res_num_alignment)
441  return self._alignments
442 
443  @property
444  def mapped_residues(self):
445  """Mapping of shared residues in :attr:`alignments`.
446 
447  :getter: Computed on first use (cached)
448  :type: :class:`dict` *mapped_residues[c1][r1] = r2* with:
449  *c1* = Chain name in first entity (= first sequence in aln),
450  *r1* = Residue number in first entity,
451  *r2* = Residue number in second entity
452  """
453  if self._mapped_residues is None:
454  self._mapped_residues = _GetMappedResidues(self.alignments)
455  return self._mapped_residues
456 
457  @property
458  def global_score(self):
459  """QS-score with penalties.
460 
461  The range of the score is between 0 (i.e. no interface residues are shared
462  between biounits) and 1 (i.e. the interfaces are identical).
463 
464  The global QS-score is computed applying penalties when interface residues
465  or entire chains are missing (i.e. anything that is not mapped in
466  :attr:`mapped_residues` / :attr:`chain_mapping`) in one of the biounits.
467 
468  :getter: Computed on first use (cached)
469  :type: :class:`float`
470  :raises: :class:`QSscoreError` if only one chain is mapped
471  """
472  if self._global_score is None:
473  self._ComputeScores()
474  return self._global_score
475 
476  @property
477  def best_score(self):
478  """QS-score without penalties.
479 
480  Like :attr:`global_score`, but neglecting additional residues or chains in
481  one of the biounits (i.e. the score is calculated considering only mapped
482  chains and residues).
483 
484  :getter: Computed on first use (cached)
485  :type: :class:`float`
486  :raises: :class:`QSscoreError` if only one chain is mapped
487  """
488  if self._best_score is None:
489  self._ComputeScores()
490  return self._best_score
491 
492  @property
493  def superposition(self):
494  """Superposition result based on shared CA atoms in :attr:`alignments`.
495 
496  The superposition can be used to map :attr:`QSscoreEntity.ent` of
497  :attr:`qs_ent_1` onto the one of :attr:`qs_ent_2`. Use
498  :func:`ost.geom.Invert` if you need the opposite transformation.
499 
500  :getter: Computed on first use (cached)
501  :type: :class:`ost.mol.alg.SuperpositionResult`
502  """
503  if self._superposition is None:
504  self._superposition = _GetQsSuperposition(self.alignments)
505  # report it
506  sup_rmsd = self._superposition.rmsd
507  cmp_view = self._superposition.view1
508  LogInfo('CA RMSD for %s aligned residues on %s chains: %.2f' \
509  % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd))
510  return self._superposition
511 
512  @property
513  def clustalw_bin(self):
514  """
515  Full path to ``clustalw`` or ``clustalw2`` executable to use for multiple
516  sequence alignments (unless :attr:`chain_mapping` is provided manually).
517 
518  :getter: Located in path on first use (cached)
519  :type: :class:`str`
520  """
521  if self._clustalw_bin is None:
522  self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2'))
523  return self._clustalw_bin
524 
525  def GetOligoLDDTScorer(self, settings, penalize_extra_chains=True):
526  """
527  :return: :class:`OligoLDDTScorer` object, setup for this QS scoring problem.
528  :param settings: Passed to :class:`OligoLDDTScorer` constructor.
529  :param penalize_extra_chains: Passed to :class:`OligoLDDTScorer` constructor.
530  """
531  if penalize_extra_chains:
532  return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
533  self.alignments, self.calpha_only, settings,
534  True, self.chem_mapping)
535  else:
536  return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
537  self.alignments, self.calpha_only, settings, False)
538 
539 
540  ##############################################################################
541  # Class internal helpers (anything that doesnt easily work without this class)
542  ##############################################################################
543 
544  def _ComputeAlignedEntities(self):
545  """Fills cached ent_to_cm_1 and ent_to_cm_2."""
546  # get aligned residues via MSA
547  ev1, ev2 = _GetAlignedResidues(self.qs_ent_1, self.qs_ent_2,
548  self.chem_mapping,
550  self.clustalw_bin)
551  # extract new entities
552  self._ent_to_cm_1 = mol.CreateEntityFromView(ev1, True)
553  self._ent_to_cm_2 = mol.CreateEntityFromView(ev2, True)
554  # name them
555  self._ent_to_cm_1.SetName(self.qs_ent_1.GetName())
556  self._ent_to_cm_2.SetName(self.qs_ent_2.GetName())
557 
558  def _ComputeSymmetry(self):
559  """Fills cached symm_1 and symm_2."""
560  # get them
561  self._symm_1, self._symm_2 = \
562  _FindSymmetry(self.qs_ent_1, self.qs_ent_2, self.ent_to_cm_1,
563  self.ent_to_cm_2, self.chem_mapping)
564  # check that we have reasonable symmetries set (fallback: all chains)
565  if not _AreValidSymmetries(self._symm_1, self._symm_2):
566  self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
567  self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
568 
569  def _ComputeScores(self):
570  """Fills cached global_score and best_score."""
571  if len(self.chain_mapping) < 2:
572  raise QSscoreError("QS-score is not defined for monomers")
573  # get contacts
574  if self.calpha_only:
575  contacts_1 = self.qs_ent_1.contacts_ca
576  contacts_2 = self.qs_ent_2.contacts_ca
577  else:
578  contacts_1 = self.qs_ent_1.contacts
579  contacts_2 = self.qs_ent_2.contacts
580  # get scores
581  scores = _GetScores(contacts_1, contacts_2, self.mapped_residues,
582  self.chain_mapping)
583  self._best_score = scores[0]
584  self._global_score = scores[1]
585  # report scores
586  LogInfo('QSscore %s, %s: best: %.2f, global: %.2f' \
587  % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(),
588  self._best_score, self._global_score))
589 
590 
591 ###############################################################################
592 # Entity with cached entries for QS scoring
593 ###############################################################################
594 
595 class QSscoreEntity(object):
596  """Entity with cached entries for QS scoring.
597 
598  Any known / precomputed information can be filled into the appropriate
599  attribute here as long as they are labelled as read/write. Otherwise the
600  quantities are computed on first access and cached (lazy evaluation). The
601  heaviest load is expected when computing :attr:`contacts` and
602  :attr:`contacts_ca`.
603 
604  :param ent: Entity to be used for QS scoring. A copy of it will be processed.
605  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
606 
607  .. attribute:: is_valid
608 
609  True, if successfully initialized. False, if input structure has no protein
610  chains with >= 20 residues.
611 
612  :type: :class:`bool`
613 
614  .. attribute:: original_name
615 
616  Name set for *ent* when object was created.
617 
618  :type: :class:`str`
619 
620  .. attribute:: ent
621 
622  Cleaned version of *ent* passed at construction. Hydrogens are removed, the
623  entity is processed with a :class:`~ost.conop.RuleBasedProcessor` and chains
624  listed in :attr:`removed_chains` have been removed. The name of this entity
625  might change during scoring (see :func:`GetName`). Otherwise, this will be
626  fixed.
627 
628  :type: :class:`~ost.mol.EntityHandle`
629 
630  .. attribute:: removed_chains
631 
632  Chains removed from *ent* passed at construction. These are ligand and water
633  chains as well as small (< 20 res.) peptides or chains with no amino acids
634  (determined by chem. type, which is set by rule based processor).
635 
636  :type: :class:`list` of :class:`str`
637 
638  .. attribute:: calpha_only
639 
640  Whether entity is CA-only (i.e. it has 0 CB atoms)
641 
642  :type: :class:`bool`
643  """
644  def __init__(self, ent):
645  # copy entity and process/clean it
646  self.original_name = ent.GetName()
647  ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True)
648  if not conop.GetDefaultLib():
649  raise RuntimeError("QSscore computation requires a compound library!")
650  pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
651  pr.Process(ent)
652  self.ent, self.removed_chains, self.calpha_only = _CleanInputEntity(ent)
653  # check if it's suitable for QS scoring
654  if self.ent.chain_count == 0:
655  LogError('Bad input file: ' + ent.GetName() + '. No chains left after '
656  'removing water, ligands and small peptides.')
657  self.is_valid = False
658  elif self.ent.chain_count == 1:
659  LogWarning('Structure ' + ent.GetName() + ' is a monomer.')
660  self.is_valid = True
661  else:
662  self.is_valid = True
663  # init cached stuff
664  self._ca_entity = None
665  self._ca_chains = None
666  self._alignments = {}
667  self._chem_groups = None
668  self._angles = {}
669  self._axis = {}
670  self._contacts = None
671  self._contacts_ca = None
672 
673  def GetName(self):
674  """Wrapper to :func:`~ost.mol.EntityHandle.GetName` of :attr:`ent`.
675  This is used to uniquely identify the entity while scoring. The name may
676  therefore change while :attr:`original_name` remains fixed.
677  """
678  # for duck-typing and convenience
679  return self.ent.GetName()
680 
681  def SetName(self, new_name):
682  """Wrapper to :func:`~ost.mol.EntityHandle.SetName` of :attr:`ent`.
683  Use this to change unique identifier while scoring (see :func:`GetName`).
684  """
685  # for duck-typing and convenience
686  self.ent.SetName(new_name)
687 
688  @property
689  def ca_entity(self):
690  """
691  Reduced representation of :attr:`ent` with only CA atoms.
692  This guarantees that each included residue has exactly one atom.
693 
694  :getter: Computed on first use (cached)
695  :type: :class:`~ost.mol.EntityHandle`
696  """
697  if self._ca_entity is None:
698  self._ca_entity = _GetCAOnlyEntity(self.ent)
699  return self._ca_entity
700 
701  @property
702  def ca_chains(self):
703  """
704  Map of chain names in :attr:`ent` to sequences with attached view to CA-only
705  chains (into :attr:`ca_entity`). Useful for alignments and superpositions.
706 
707  :getter: Computed on first use (cached)
708  :type: :class:`dict` (key = :class:`str`,
709  value = :class:`~ost.seq.SequenceHandle`)
710  """
711  if self._ca_chains is None:
712  self._ca_chains = dict()
713  ca_entity = self.ca_entity
714  for ch in ca_entity.chains:
715  self._ca_chains[ch.name] = seq.SequenceFromChain(ch.name, ch)
716  return self._ca_chains
717 
718  def GetAlignment(self, c1, c2):
719  """Get sequence alignment of chain *c1* with chain *c2*.
720  Computed on first use based on :attr:`ca_chains` (cached).
721 
722  :param c1: Chain name for first chain to align.
723  :type c1: :class:`str`
724  :param c2: Chain name for second chain to align.
725  :type c2: :class:`str`
726  :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
727  """
728  if (c1,c2) not in self._alignments:
729  ca_chains = self.ca_chains
730  self._alignments[(c1,c2)] = _AlignAtomSeqs(ca_chains[c1], ca_chains[c2])
731  return self._alignments[(c1,c2)]
732 
733  @property
734  def chem_groups(self):
735  """
736  Intra-complex group of chemically identical (seq. id. > 95%) polypeptide
737  chains as extracted from :attr:`ca_chains`. First chain in group is the one
738  with the longest sequence.
739 
740  :getter: Computed on first use (cached)
741  :type: :class:`list` of :class:`list` of :class:`str` (chain names)
742  """
743  if self._chem_groups is None:
744  self._chem_groups = _GetChemGroups(self, 95)
745  LogInfo('Chemically equivalent chain-groups in %s: %s' \
746  % (self.GetName(), str(self._chem_groups)))
747  return self._chem_groups
748 
749  def GetAngles(self, c1, c2):
750  """Get Euler angles from superposition of chain *c1* with chain *c2*.
751  Computed on first use based on :attr:`ca_chains` (cached).
752 
753  :param c1: Chain name for first chain to superpose.
754  :type c1: :class:`str`
755  :param c2: Chain name for second chain to superpose.
756  :type c2: :class:`str`
757  :return: 3 Euler angles (may contain nan if something fails).
758  :rtype: :class:`numpy.array`
759  """
760  if (c1,c2) not in self._angles:
761  self._GetSuperposeData(c1, c2)
762  return self._angles[(c1,c2)]
763 
764  def GetAxis(self, c1, c2):
765  """Get axis of symmetry from superposition of chain *c1* with chain *c2*.
766  Computed on first use based on :attr:`ca_chains` (cached).
767 
768  :param c1: Chain name for first chain to superpose.
769  :type c1: :class:`str`
770  :param c2: Chain name for second chain to superpose.
771  :type c2: :class:`str`
772  :return: Rotational axis (may contain nan if something fails).
773  :rtype: :class:`numpy.array`
774  """
775  if (c1,c2) not in self._axis:
776  self._GetSuperposeData(c1, c2)
777  return self._axis[(c1,c2)]
778 
779  @property
780  def contacts(self):
781  """
782  Connectivity dictionary (**read/write**).
783  As given by :func:`GetContacts` with *calpha_only* = False on :attr:`ent`.
784 
785  :getter: Computed on first use (cached)
786  :setter: Uses :func:`FilterContacts` to ensure that we only keep contacts
787  for chains in the cleaned entity.
788  :type: See return type of :func:`GetContacts`
789  """
790  if self._contacts is None:
791  self._contacts = GetContacts(self.ent, False)
792  return self._contacts
793 
794  @contacts.setter
795  def contacts(self, new_contacts):
796  chain_names = set([ch.name for ch in self.ent.chains])
797  self._contacts = FilterContacts(new_contacts, chain_names)
798 
799  @property
800  def contacts_ca(self):
801  """
802  CA-only connectivity dictionary (**read/write**).
803  Like :attr:`contacts` but with *calpha_only* = True in :func:`GetContacts`.
804  """
805  if self._contacts_ca is None:
806  self._contacts_ca = GetContacts(self.ent, True)
807  return self._contacts_ca
808 
809  @contacts_ca.setter
810  def contacts_ca(self, new_contacts):
811  chain_names = set([ch.name for ch in self.ent.chains])
812  self._contacts_ca = FilterContacts(new_contacts, chain_names)
813 
814  ##############################################################################
815  # Class internal helpers (anything that doesnt easily work without this class)
816  ##############################################################################
817 
818  def _GetSuperposeData(self, c1, c2):
819  """Fill _angles and _axis from superposition of CA chains of c1 and c2."""
820  # get aligned views (must contain identical numbers of atoms!)
821  aln = self.GetAlignment(c1, c2)
822  if not aln:
823  # fallback for non-aligned stuff (nan)
824  self._angles[(c1,c2)] = np.empty(3) * np.nan
825  self._axis[(c1,c2)] = np.empty(3) * np.nan
826  return
827  v1, v2 = seq.ViewsFromAlignment(aln)
828  if v1.atom_count < 3:
829  # fallback for non-aligned stuff (nan)
830  self._angles[(c1,c2)] = np.empty(3) * np.nan
831  self._axis[(c1,c2)] = np.empty(3) * np.nan
832  return
833  # get transformation
834  sup_res = mol.alg.SuperposeSVD(v1, v2, apply_transform=False)
835  Rt = sup_res.transformation
836  # extract angles
837  a,b,c = _GetAngles(Rt)
838  self._angles[(c1,c2)] = np.asarray([a,b,c])
839  # extract axis of symmetry
840  R3 = geom.Rotation3(Rt.ExtractRotation())
841  self._axis[(c1,c2)] = np.asarray(R3.GetRotationAxis().data)
842 
843 ###############################################################################
844 # Contacts computations
845 ###############################################################################
846 
847 def FilterContacts(contacts, chain_names):
848  """Filter contacts to contain only contacts for chains in *chain_names*.
849 
850  :param contacts: Connectivity dictionary as produced by :func:`GetContacts`.
851  :type contacts: :class:`dict`
852  :param chain_names: Chain names to keep.
853  :type chain_names: :class:`list` or (better) :class:`set`
854  :return: New connectivity dictionary (format as in :func:`GetContacts`)
855  :rtype: :class:`dict`
856  """
857  # create new dict with subset
858  filtered_contacts = dict()
859  for c1 in contacts:
860  if c1 in chain_names:
861  new_contacts = dict()
862  for c2 in contacts[c1]:
863  if c2 in chain_names:
864  new_contacts[c2] = contacts[c1][c2]
865  # avoid adding empty dicts
866  if new_contacts:
867  filtered_contacts[c1] = new_contacts
868  return filtered_contacts
869 
870 def GetContacts(entity, calpha_only, dist_thr=12.0):
871  """Get inter-chain contacts of a macromolecular entity.
872 
873  Contacts are pairs of residues within a given distance belonging to different
874  chains. They are stored once per pair and include the CA/CB-CA/CB distance.
875 
876  :param entity: An entity to check connectivity for.
877  :type entity: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
878  :param calpha_only: If True, we only consider CA-CA distances. Else, we use CB
879  unless the residue is a GLY.
880  :type calpha_only: :class:`bool`
881  :param dist_thr: Maximal CA/CB-CA/CB distance to be considered in contact.
882  :type dist_thr: :class:`float`
883  :return: A connectivity dictionary. A pair of residues with chain names
884  *ch_name1* & *ch_name2* (*ch_name1* < *ch_name2*), residue numbers
885  *res_num1* & *res_num2* and distance *dist* (<= *dist_thr*) are
886  stored as *result[ch_name1][ch_name2][res_num1][res_num2]* = *dist*.
887  :rtype: :class:`dict`
888  """
889  # get ent copy to search on
890  if calpha_only:
891  ev = entity.Select("aname=CA")
892  else:
893  ev = entity.Select("(rname=GLY and aname=CA) or aname=CB")
894  ent = mol.CreateEntityFromView(ev, True)
895  # search all vs all
896  contacts = dict()
897  for atom in ent.atoms:
898  ch_name1 = atom.chain.name
899  res_num1 = atom.residue.number.num
900  close_atoms = ent.FindWithin(atom.pos, dist_thr)
901  for close_atom in close_atoms:
902  ch_name2 = close_atom.chain.name
903  if ch_name2 > ch_name1:
904  res_num2 = close_atom.residue.number.num
905  dist = geom.Distance(atom.pos, close_atom.pos)
906  # add to contacts
907  if ch_name1 not in contacts:
908  contacts[ch_name1] = dict()
909  if ch_name2 not in contacts[ch_name1]:
910  contacts[ch_name1][ch_name2] = dict()
911  if res_num1 not in contacts[ch_name1][ch_name2]:
912  contacts[ch_name1][ch_name2][res_num1] = dict()
913  contacts[ch_name1][ch_name2][res_num1][res_num2] = round(dist, 3)
914  # DONE
915  return contacts
916 
917 ###############################################################################
918 # Oligo-lDDT scores
919 ###############################################################################
920 
921 class OligoLDDTScorer(object):
922  """Helper class to calculate oligomeric lDDT scores.
923 
924  This class can be used independently, but commonly it will be created by
925  calling :func:`QSscorer.GetOligoLDDTScorer`.
926 
927  .. note::
928 
929  By construction, lDDT scores are not symmetric and hence it matters which
930  structure is the reference (:attr:`ref`) and which one is the model
931  (:attr:`mdl`). Extra residues in the model are generally not considered.
932  Extra chains in both model and reference can be considered by setting the
933  :attr:`penalize_extra_chains` flag to True.
934 
935  :param ref: Sets :attr:`ref`
936  :param mdl: Sets :attr:`mdl`
937  :param alignments: Sets :attr:`alignments`
938  :param calpha_only: Sets :attr:`calpha_only`
939  :param settings: Sets :attr:`settings`
940  :param penalize_extra_chains: Sets :attr:`penalize_extra_chains`
941  :param chem_mapping: Sets :attr:`chem_mapping`. Must be given if
942  *penalize_extra_chains* is True.
943 
944  .. attribute:: ref
945  mdl
946 
947  Full reference/model entity to be scored. The entity must contain all chains
948  mapped in :attr:`alignments` and may also contain additional ones which are
949  considered if :attr:`penalize_extra_chains` is True.
950 
951  :type: :class:`~ost.mol.EntityHandle`
952 
953  .. attribute:: alignments
954 
955  One alignment for each mapped chain of :attr:`ref`/:attr:`mdl` as defined in
956  :attr:`QSscorer.alignments`. The first sequence of each alignment belongs to
957  :attr:`ref` and the second one to :attr:`mdl`. Sequences must have sequence
958  naming and attached views as defined in :attr:`QSscorer.alignments`.
959 
960  :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
961 
962  .. attribute:: calpha_only
963 
964  If True, restricts lDDT score to CA only.
965 
966  :type: :class:`bool`
967 
968  .. attribute:: settings
969 
970  Settings to use for lDDT scoring.
971 
972  :type: :class:`~ost.mol.alg.lDDTSettings`
973 
974  .. attribute:: penalize_extra_chains
975 
976  If True, extra chains in both :attr:`ref` and :attr:`mdl` will penalize the
977  lDDT scores.
978 
979  :type: :class:`bool`
980 
981  .. attribute:: chem_mapping
982 
983  Inter-complex mapping of chemical groups as defined in
984  :attr:`QSscorer.chem_mapping`. Used to find "chem-mapped" chains in
985  :attr:`ref` for unmapped chains in :attr:`mdl` when penalizing scores.
986  Each unmapped model chain can add extra reference-contacts according to the
987  average total contacts of each single "chem-mapped" reference chain. If
988  there is no "chem-mapped" reference chain, a warning is shown and the model
989  chain is ignored.
990 
991 
992  Only relevant if :attr:`penalize_extra_chains` is True.
993 
994  :type: :class:`dict` with key = :class:`tuple` of chain names in
995  :attr:`ref` and value = :class:`tuple` of chain names in
996  :attr:`mdl`.
997  """
998 
999  # NOTE: one could also allow computation of both penalized and unpenalized
1000  # in same object -> must regenerate lddt_ref / lddt_mdl though
1001 
1002  def __init__(self, ref, mdl, alignments, calpha_only, settings,
1003  penalize_extra_chains=False, chem_mapping=None):
1004  # sanity checks
1005  if chem_mapping is None and penalize_extra_chains:
1006  raise RuntimeError("Must provide chem_mapping when requesting penalty "
1007  "for extra chains!")
1008  if not penalize_extra_chains:
1009  # warn for unmapped model chains
1010  unmapped_mdl_chains = self._GetUnmappedMdlChains(mdl, alignments)
1011  if unmapped_mdl_chains:
1012  LogWarning('MODEL contains chains unmapped to REFERENCE, '
1013  'lDDT is not considering MODEL chains %s' \
1014  % str(list(unmapped_mdl_chains)))
1015  # warn for unmapped reference chains
1016  ref_chains = set(ch.name for ch in ref.chains)
1017  mapped_ref_chains = set(aln.GetSequence(0).GetName() for aln in alignments)
1018  unmapped_ref_chains = (ref_chains - mapped_ref_chains)
1019  if unmapped_ref_chains:
1020  LogWarning('REFERENCE contains chains unmapped to MODEL, '
1021  'lDDT is not considering REFERENCE chains %s' \
1022  % str(list(unmapped_ref_chains)))
1023  # prepare fields
1024  self.ref = ref
1025  self.mdl = mdl
1026  self.alignments = alignments
1027  self.calpha_only = calpha_only
1028  self.settings = settings
1029  self.penalize_extra_chains = penalize_extra_chains
1030  self.chem_mapping = chem_mapping
1031  self._sc_lddt = None
1032  self._oligo_lddt = None
1033  self._weighted_lddt = None
1034  self._lddt_ref = None
1035  self._lddt_mdl = None
1036  self._oligo_lddt_scorer = None
1037  self._mapped_lddt_scorers = None
1038  self._ref_scorers = None
1039  self._model_penalty = None
1040 
1041  @property
1042  def oligo_lddt(self):
1043  """Oligomeric lDDT score.
1044 
1045  The score is computed as conserved contacts divided by the total contacts
1046  in the reference using the :attr:`oligo_lddt_scorer`, which uses the full
1047  complex as reference/model structure. If :attr:`penalize_extra_chains` is
1048  True, the reference/model complexes contain all chains (otherwise only the
1049  mapped ones) and additional contacts are added to the reference's total
1050  contacts for unmapped model chains according to the :attr:`chem_mapping`.
1051 
1052  The main difference with :attr:`weighted_lddt` is that the lDDT scorer
1053  "sees" the full complex here (incl. inter-chain contacts), while the
1054  weighted single chain score looks at each chain separately.
1055 
1056  :getter: Computed on first use (cached)
1057  :type: :class:`float`
1058  """
1059  if self._oligo_lddt is None:
1060  LogInfo('Reference %s has: %s chains' \
1061  % (self.ref.GetName(), self.ref.chain_count))
1062  LogInfo('Model %s has: %s chains' \
1063  % (self.mdl.GetName(), self.mdl.chain_count))
1064 
1065  # score with or w/o extra-chain penalty
1066  if self.penalize_extra_chains:
1067  denominator = self.oligo_lddt_scorer.total_contacts
1068  denominator += self._GetModelPenalty()
1069  if denominator > 0:
1070  oligo_lddt = self.oligo_lddt_scorer.conserved_contacts \
1071  / float(denominator)
1072  else:
1073  oligo_lddt = 0.0
1074  else:
1075  oligo_lddt = self.oligo_lddt_scorer.global_score
1076  self._oligo_lddt = oligo_lddt
1077  return self._oligo_lddt
1078 
1079  @property
1080  def weighted_lddt(self):
1081  """Weighted average of single chain lDDT scores.
1082 
1083  The score is computed as a weighted average of single chain lDDT scores
1084  (see :attr:`sc_lddt_scorers`) using the total contacts of each single
1085  reference chain as weights. If :attr:`penalize_extra_chains` is True,
1086  unmapped chains are added with a 0 score and total contacts taken from
1087  the actual reference chains or (for unmapped model chains) using the
1088  :attr:`chem_mapping`.
1089 
1090  See :attr:`oligo_lddt` for a comparison of the two scores.
1091 
1092  :getter: Computed on first use (cached)
1093  :type: :class:`float`
1094  """
1095  if self._weighted_lddt is None:
1096  scores = [s.global_score for s in self.sc_lddt_scorers]
1097  weights = [s.total_contacts for s in self.sc_lddt_scorers]
1098  nominator = sum([s * w for s, w in zip(scores, weights)])
1099  if self.penalize_extra_chains:
1100  ref_scorers = self._GetRefScorers()
1101  denominator = sum(s.total_contacts for s in ref_scorers.values())
1102  denominator += self._GetModelPenalty()
1103  else:
1104  denominator = sum(weights)
1105  if denominator > 0:
1106  self._weighted_lddt = nominator / float(denominator)
1107  else:
1108  self._weighted_lddt = 0.0
1109  return self._weighted_lddt
1110 
1111  @property
1112  def lddt_ref(self):
1113  """The reference entity used for oligomeric lDDT scoring
1114  (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1115 
1116  Since the lDDT computation requires a single chain with mapped residue
1117  numbering, all chains of :attr:`ref` are appended into a single chain X with
1118  unique residue numbers according to the column-index in the alignment. The
1119  alignments are in the same order as they appear in :attr:`alignments`.
1120  Additional residues are appended at the end of the chain with unique residue
1121  numbers. Unmapped chains are only added if :attr:`penalize_extra_chains` is
1122  True. Only CA atoms are considered if :attr:`calpha_only` is True.
1123 
1124  :getter: Computed on first use (cached)
1125  :type: :class:`~ost.mol.EntityHandle`
1126  """
1127  if self._lddt_ref is None:
1128  self._PrepareOligoEntities()
1129  return self._lddt_ref
1130 
1131  @property
1132  def lddt_mdl(self):
1133  """The model entity used for oligomeric lDDT scoring
1134  (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1135 
1136  Like :attr:`lddt_ref`, this is a single chain X containing all chains of
1137  :attr:`mdl`. The residue numbers match the ones in :attr:`lddt_ref` where
1138  aligned and have unique numbers for additional residues.
1139 
1140  :getter: Computed on first use (cached)
1141  :type: :class:`~ost.mol.EntityHandle`
1142  """
1143  if self._lddt_mdl is None:
1144  self._PrepareOligoEntities()
1145  return self._lddt_mdl
1146 
1147  @property
1149  """lDDT Scorer object for :attr:`lddt_ref` and :attr:`lddt_mdl`.
1150 
1151  :getter: Computed on first use (cached)
1152  :type: :class:`~ost.mol.alg.lDDTScorer`
1153  """
1154  if self._oligo_lddt_scorer is None:
1156  references=[self.lddt_ref.Select("")],
1157  model=self.lddt_mdl.Select(""),
1158  settings=self.settings)
1159  return self._oligo_lddt_scorer
1160 
1161  @property
1163  """List of scorer objects for each chain mapped in :attr:`alignments`.
1164 
1165  :getter: Computed on first use (cached)
1166  :type: :class:`list` of :class:`MappedLDDTScorer`
1167  """
1168  if self._mapped_lddt_scorers is None:
1169  self._mapped_lddt_scorers = list()
1170  for aln in self.alignments:
1171  mapped_lddt_scorer = MappedLDDTScorer(aln, self.calpha_only,
1172  self.settings)
1173  self._mapped_lddt_scorers.append(mapped_lddt_scorer)
1174  return self._mapped_lddt_scorers
1175 
1176  @property
1177  def sc_lddt_scorers(self):
1178  """List of lDDT scorer objects extracted from :attr:`mapped_lddt_scorers`.
1179 
1180  :type: :class:`list` of :class:`~ost.mol.alg.lDDTScorer`
1181  """
1182  return [mls.lddt_scorer for mls in self.mapped_lddt_scorers]
1183 
1184  @property
1185  def sc_lddt(self):
1186  """List of global scores extracted from :attr:`sc_lddt_scorers`.
1187 
1188  If scoring for a mapped chain fails, an error is displayed and a score of 0
1189  is assigned.
1190 
1191  :getter: Computed on first use (cached)
1192  :type: :class:`list` of :class:`float`
1193  """
1194  if self._sc_lddt is None:
1195  self._sc_lddt = list()
1196  for lddt_scorer in self.sc_lddt_scorers:
1197  try:
1198  self._sc_lddt.append(lddt_scorer.global_score)
1199  except Exception as ex:
1200  LogError('Single chain lDDT failed:', str(ex))
1201  self._sc_lddt.append(0.0)
1202  return self._sc_lddt
1203 
1204  ##############################################################################
1205  # Class internal helpers
1206  ##############################################################################
1207 
1208  def _PrepareOligoEntities(self):
1209  # simple wrapper to avoid code duplication
1210  self._lddt_ref, self._lddt_mdl = _MergeAlignedChains(
1211  self.alignments, self.ref, self.mdl, self.calpha_only,
1212  self.penalize_extra_chains)
1213 
1214  @staticmethod
1215  def _GetUnmappedMdlChains(mdl, alignments):
1216  # assume model is second sequence in alignment and is named by chain
1217  mdl_chains = set(ch.name for ch in mdl.chains)
1218  mapped_mdl_chains = set(aln.GetSequence(1).GetName() for aln in alignments)
1219  return (mdl_chains - mapped_mdl_chains)
1220 
1221  def _GetRefScorers(self):
1222  # single chain lddt scorers for each reference chain (key = chain name)
1223  if self._ref_scorers is None:
1224  # collect from mapped_lddt_scorers
1225  ref_scorers = dict()
1226  for mapped_lddt_scorer in self.mapped_lddt_scorers:
1227  ref_ch_name = mapped_lddt_scorer.reference_chain_name
1228  ref_scorers[ref_ch_name] = mapped_lddt_scorer.lddt_scorer
1229  # add new ones where needed
1230  for ch in self.ref.chains:
1231  if ch.name not in ref_scorers:
1232  if self.calpha_only:
1233  ref_chain = ch.Select('aname=CA')
1234  else:
1235  ref_chain = ch.Select('')
1236  ref_scorers[ch.name] = lDDTScorer(
1237  references=[ref_chain],
1238  model=ref_chain,
1239  settings=self.settings)
1240  # store in cache
1241  self._ref_scorers = ref_scorers
1242  # fetch from cache
1243  return self._ref_scorers
1244 
1245  def _GetModelPenalty(self):
1246  # extra value to add to total number of distances for extra model chains
1247  # -> estimated from chem-mapped reference chains
1248  if self._model_penalty is None:
1249  # sanity check
1250  if self.chem_mapping is None:
1251  raise RuntimeError("Must provide chem_mapping when requesting penalty "
1252  "for extra model chains!")
1253  # get cached ref_scorers
1254  ref_scorers = self._GetRefScorers()
1255  # get unmapped model chains
1256  unmapped_mdl_chains = self._GetUnmappedMdlChains(self.mdl, self.alignments)
1257  # map extra chains to ref. chains
1258  model_penalty = 0
1259  for ch_name_mdl in sorted(unmapped_mdl_chains):
1260  # get penalty for chain
1261  cur_penalty = None
1262  for cm_ref, cm_mdl in self.chem_mapping.iteritems():
1263  if ch_name_mdl in cm_mdl:
1264  # penalize by an average of the chem. mapped ref. chains
1265  cur_penalty = 0
1266  for ch_name_ref in cm_ref:
1267  # assumes that total_contacts is cached (for speed)
1268  cur_penalty += ref_scorers[ch_name_ref].total_contacts
1269  cur_penalty /= float(len(cm_ref))
1270  break
1271  # report penalty
1272  if cur_penalty is None:
1273  LogWarning('Extra MODEL chain %s could not be chemically mapped to '
1274  'any chain in REFERENCE, lDDT cannot consider it!' \
1275  % ch_name_mdl)
1276  else:
1277  LogScript('Extra MODEL chain %s added to lDDT score by considering '
1278  'chemically mapped chains in REFERENCE.' % ch_name_mdl)
1279  model_penalty += cur_penalty
1280  # store in cache
1281  self._model_penalty = model_penalty
1282  # fetch from cache
1283  return self._model_penalty
1284 
1285 
1286 class MappedLDDTScorer(object):
1287  """A simple class to calculate a single-chain lDDT score on a given chain to
1288  chain mapping as extracted from :class:`OligoLDDTScorer`.
1289 
1290  :param alignment: Sets :attr:`alignment`
1291  :param calpha_only: Sets :attr:`calpha_only`
1292  :param settings: Sets :attr:`settings`
1293 
1294  .. attribute:: alignment
1295 
1296  Alignment with two sequences named according to the mapped chains and with
1297  views attached to both sequences (e.g. one of the items of
1298  :attr:`QSscorer.alignments`).
1299 
1300  The first sequence is assumed to be the reference and the second one the
1301  model. Since the lDDT score is not symmetric (extra residues in model are
1302  ignored), the order is important.
1303 
1304  :type: :class:`~ost.seq.AlignmentHandle`
1305 
1306  .. attribute:: calpha_only
1307 
1308  If True, restricts lDDT score to CA only.
1309 
1310  :type: :class:`bool`
1311 
1312  .. attribute:: settings
1313 
1314  Settings to use for lDDT scoring.
1315 
1316  :type: :class:`~ost.mol.alg.lDDTSettings`
1317 
1318  .. attribute:: lddt_scorer
1319 
1320  lDDT Scorer object for the given chains.
1321 
1322  :type: :class:`~ost.mol.alg.lDDTScorer`
1323 
1324  .. attribute:: reference_chain_name
1325 
1326  Chain name of the reference.
1327 
1328  :type: :class:`str`
1329 
1330  .. attribute:: model_chain_name
1331 
1332  Chain name of the model.
1333 
1334  :type: :class:`str`
1335  """
1336  def __init__(self, alignment, calpha_only, settings):
1337  # prepare fields
1338  self.alignment = alignment
1339  self.calpha_only = calpha_only
1340  self.settings = settings
1341  self.lddt_scorer = None # set in _InitScorer
1342  self.reference_chain_name = alignment.sequences[0].name
1343  self.model_chain_name = alignment.sequences[1].name
1344  self._old_number_label = "old_num"
1345  self._extended_alignment = None # set in _InitScorer
1346  # initialize lDDT scorer
1347  self._InitScorer()
1348 
1350  """
1351  :return: Scores for each residue
1352  :rtype: :class:`list` of :class:`dict` with one item for each residue
1353  existing in model and reference:
1354 
1355  - "residue_number": Residue number in reference chain
1356  - "residue_name": Residue name in reference chain
1357  - "lddt": local lDDT
1358  - "conserved_contacts": number of conserved contacts
1359  - "total_contacts": total number of contacts
1360  """
1361  scores = list()
1362  assigned_residues = list()
1363  # Make sure the score is calculated
1364  self.lddt_scorer.global_score
1365  for col in self._extended_alignment:
1366  if col[0] != "-" and col.GetResidue(3).IsValid():
1367  ref_res = col.GetResidue(0)
1368  mdl_res = col.GetResidue(1)
1369  ref_res_renum = col.GetResidue(2)
1370  mdl_res_renum = col.GetResidue(3)
1371  if ref_res.one_letter_code != ref_res_renum.one_letter_code:
1372  raise RuntimeError("Reference residue name mapping inconsistent: %s != %s" %
1373  (ref_res.one_letter_code,
1374  ref_res_renum.one_letter_code))
1375  if mdl_res.one_letter_code != mdl_res_renum.one_letter_code:
1376  raise RuntimeError("Model residue name mapping inconsistent: %s != %s" %
1377  (mdl_res.one_letter_code,
1378  mdl_res_renum.one_letter_code))
1379  if ref_res.GetNumber().num != ref_res_renum.GetIntProp(self._old_number_label):
1380  raise RuntimeError("Reference residue number mapping inconsistent: %s != %s" %
1381  (ref_res.GetNumber().num,
1382  ref_res_renum.GetIntProp(self._old_number_label)))
1383  if mdl_res.GetNumber().num != mdl_res_renum.GetIntProp(self._old_number_label):
1384  raise RuntimeError("Model residue number mapping inconsistent: %s != %s" %
1385  (mdl_res.GetNumber().num,
1386  mdl_res_renum.GetIntProp(self._old_number_label)))
1387  if ref_res.qualified_name in assigned_residues:
1388  raise RuntimeError("Duplicated residue in reference: " %
1389  (ref_res.qualified_name))
1390  else:
1391  assigned_residues.append(ref_res.qualified_name)
1392  # check if property there (may be missing for CA-only)
1393  if mdl_res_renum.HasProp(self.settings.label):
1394  scores.append({
1395  "residue_number": ref_res.GetNumber().num,
1396  "residue_name": ref_res.name,
1397  "lddt": mdl_res_renum.GetFloatProp(self.settings.label),
1398  "conserved_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_conserved"),
1399  "total_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_total")})
1400  return scores
1401 
1402  ##############################################################################
1403  # Class internal helpers (anything that doesnt easily work without this class)
1404  ##############################################################################
1405 
1406  def _InitScorer(self):
1407  # Use copy of alignment (extended by 2 extra sequences for renumbering)
1408  aln = self.alignment.Copy()
1409  # Get chains and renumber according to alignment (for lDDT)
1410  reference = Renumber(
1411  aln.GetSequence(0),
1412  old_number_label=self._old_number_label).CreateFullView()
1413  refseq = seq.CreateSequence(
1414  "reference_renumbered",
1415  aln.GetSequence(0).GetString())
1416  refseq.AttachView(reference)
1417  aln.AddSequence(refseq)
1418  model = Renumber(
1419  aln.GetSequence(1),
1420  old_number_label=self._old_number_label).CreateFullView()
1421  modelseq = seq.CreateSequence(
1422  "model_renumbered",
1423  aln.GetSequence(1).GetString())
1424  modelseq.AttachView(model)
1425  aln.AddSequence(modelseq)
1426  # Filter to CA-only if desired (done after AttachView to not mess it up)
1427  if self.calpha_only:
1428  self.lddt_scorer = lDDTScorer(
1429  references=[reference.Select('aname=CA')],
1430  model=model.Select('aname=CA'),
1431  settings=self.settings)
1432  else:
1433  self.lddt_scorer = lDDTScorer(
1434  references=[reference],
1435  model=model,
1436  settings=self.settings)
1437  # Store alignment for later
1438  self._extended_alignment = aln
1439 
1440 ###############################################################################
1441 # HELPERS
1442 ###############################################################################
1443 
1444 # general
1445 
1446 def _AlignAtomSeqs(seq_1, seq_2):
1447  """
1448  :type seq_1: :class:`ost.seq.SequenceHandle`
1449  :type seq_2: :class:`ost.seq.SequenceHandle`
1450  :return: Alignment of two sequences using a global alignment. Views attached
1451  to the input sequences will remain attached in the aln.
1452  :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
1453  """
1454  # NOTE: If the two sequence have a greatly different length
1455  # a local alignment could be a better choice...
1456  aln = None
1457  alns = seq.alg.GlobalAlign(seq_1, seq_2, seq.alg.BLOSUM62)
1458  if alns: aln = alns[0]
1459  if not aln:
1460  LogWarning('Failed to align %s to %s' % (seq_1.name, seq_2.name))
1461  LogWarning('%s: %s' % (seq_1.name, seq_1.string))
1462  LogWarning('%s: %s' % (seq_2.name, seq_2.string))
1463  return aln
1464 
1465 def _FixSelectChainNames(ch_names):
1466  """
1467  :return: String to be used with Select(cname=<RETURN>). Takes care of joining
1468  and putting quotation marks where needed.
1469  :rtype: :class:`str`
1470  :param ch_names: Some iterable list of chain names (:class:`str` items).
1471  """
1472  return ','.join(mol.QueryQuoteName(ch_name) for ch_name in ch_names)
1473 
1474 # QS entity
1475 
1476 def _CleanInputEntity(ent):
1477  """
1478  :param ent: The OST entity to be cleaned.
1479  :type ent: :class:`EntityHandle` or :class:`EntityView`
1480  :return: A tuple of 3 items: :attr:`QSscoreEntity.ent`,
1481  :attr:`QSscoreEntity.removed_chains`,
1482  :attr:`QSscoreEntity.calpha_only`
1483  """
1484  # find chains to remove
1485  removed_chains = []
1486  for ch in ent.chains:
1487  # we remove chains if they are small-peptides or if the contain no aa
1488  # or if they contain only unknown or modified residues
1489  if ch.name in ['-', '_'] \
1490  or ch.residue_count < 20 \
1491  or not any(r.chem_type.IsAminoAcid() for r in ch.residues) \
1492  or not (set(r.one_letter_code for r in ch.residues) - {'?', 'X'}):
1493  removed_chains.append(ch.name)
1494 
1495  # remove them from *ent*
1496  if removed_chains:
1497  view = ent.Select('cname!=%s' % _FixSelectChainNames(set(removed_chains)))
1498  ent_new = mol.CreateEntityFromView(view, True)
1499  ent_new.SetName(ent.GetName())
1500  else:
1501  ent_new = ent
1502 
1503  # check if CA only
1504  calpha_only = False
1505  if ent_new.atom_count > 0 and ent_new.Select('aname=CB').atom_count == 0:
1506  LogInfo('Structure %s is a CA only structure!' % ent_new.GetName())
1507  calpha_only = True
1508 
1509  # report and return
1510  if removed_chains:
1511  LogInfo('Chains removed from %s: %s' \
1512  % (ent_new.GetName(), ''.join(removed_chains)))
1513  LogInfo('Chains in %s: %s' \
1514  % (ent_new.GetName(), ''.join([c.name for c in ent_new.chains])))
1515  return ent_new, removed_chains, calpha_only
1516 
1517 def _GetCAOnlyEntity(ent):
1518  """
1519  :param ent: Entity to process
1520  :type ent: :class:`EntityHandle` or :class:`EntityView`
1521  :return: New entity with only CA and only one atom per residue
1522  (see :attr:`QSscoreEntity.ca_entity`)
1523  """
1524  # cook up CA only view (diff from Select = guaranteed 1 atom per residue)
1525  ca_view = ent.CreateEmptyView()
1526  # add chain by chain
1527  for res in ent.residues:
1528  ca_atom = res.FindAtom("CA")
1529  if ca_atom.IsValid():
1530  ca_view.AddAtom(ca_atom)
1531  # finalize
1532  return mol.CreateEntityFromView(ca_view, False)
1533 
1534 def _GetChemGroups(qs_ent, seqid_thr=95.):
1535  """
1536  :return: Intra-complex group of chemically identical polypeptide chains
1537  (see :attr:`QSscoreEntity.chem_groups`)
1538 
1539  :param qs_ent: Entity to process
1540  :type qs_ent: :class:`QSscoreEntity`
1541  :param seqid_thr: Threshold used to decide when two chains are identical.
1542  95 percent tolerates the few mutations crystallographers
1543  like to do.
1544  :type seqid_thr: :class:`float`
1545  """
1546  # get data from qs_ent
1547  ca_chains = qs_ent.ca_chains
1548  chain_names = sorted(ca_chains.keys())
1549  # get pairs of identical chains
1550  # NOTE: this scales quadratically with number of chains and may be optimized
1551  # -> one could merge it with "merge transitive pairs" below...
1552  id_seqs = []
1553  for ch_1, ch_2 in itertools.combinations(chain_names, 2):
1554  aln = qs_ent.GetAlignment(ch_1, ch_2)
1555  if aln and seq.alg.SequenceIdentity(aln) > seqid_thr:
1556  id_seqs.append((ch_1, ch_2))
1557  # trivial case: no matching pairs
1558  if not id_seqs:
1559  return [[name] for name in chain_names]
1560 
1561  # merge transitive pairs
1562  groups = []
1563  for ch_1, ch_2 in id_seqs:
1564  found = False
1565  for g in groups:
1566  if ch_1 in g or ch_2 in g:
1567  found = True
1568  g.add(ch_1)
1569  g.add(ch_2)
1570  if not found:
1571  groups.append(set([ch_1, ch_2]))
1572  # sort internally based on sequence length
1573  chem_groups = []
1574  for g in groups:
1575  ranked_g = sorted([(-ca_chains[ch].length, ch) for ch in g])
1576  chem_groups.append([ch for _,ch in ranked_g])
1577  # add other dissimilar chains
1578  for ch in chain_names:
1579  if not any(ch in g for g in chem_groups):
1580  chem_groups.append([ch])
1581 
1582  return chem_groups
1583 
1584 def _GetAngles(Rt):
1585  """Computes the Euler angles given a transformation matrix.
1586 
1587  :param Rt: Rt operator.
1588  :type Rt: :class:`ost.geom.Mat4`
1589  :return: A :class:`tuple` of angles for each axis (x,y,z)
1590  """
1591  rot = np.asmatrix(Rt.ExtractRotation().data).reshape(3,3)
1592  tx = np.arctan2(rot[2,1], rot[2,2])
1593  if tx < 0:
1594  tx += 2*np.pi
1595  ty = np.arctan2(rot[2,0], np.sqrt(rot[2,1]**2 + rot[2,2]**2))
1596  if ty < 0:
1597  ty += 2*np.pi
1598  tz = np.arctan2(rot[1,0], rot[0,0])
1599  if tz < 0:
1600  tz += 2*np.pi
1601  return tx,ty,tz
1602 
1603 # QS scorer
1604 
1605 def _GetChemGroupsMapping(qs_ent_1, qs_ent_2):
1606  """
1607  :return: Inter-complex mapping of chemical groups
1608  (see :attr:`QSscorer.chem_mapping`)
1609 
1610  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1611  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1612  """
1613  # get chem. groups and unique representative
1614  chem_groups_1 = qs_ent_1.chem_groups
1615  chem_groups_2 = qs_ent_2.chem_groups
1616  repr_chains_1 = {x[0]: tuple(x) for x in chem_groups_1}
1617  repr_chains_2 = {x[0]: tuple(x) for x in chem_groups_2}
1618 
1619  # if entities do not have different number of unique chains, we get the
1620  # mapping for the smaller set
1621  swapped = False
1622  if len(repr_chains_2) < len(repr_chains_1):
1623  repr_chains_1, repr_chains_2 = repr_chains_2, repr_chains_1
1624  qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1625  swapped = True
1626 
1627  # find the closest to each chain between the two entities
1628  # NOTE: this may still be sensible to orthology problem
1629  # -> currently we use a global alignment and seq. id. to rank pairs
1630  # -> we also tried local alignments and weighting the seq. id. by the
1631  # coverages of the alignments (gapless string in aln. / seq. length)
1632  # but global aln performed better...
1633  chain_pairs = []
1634  ca_chains_1 = qs_ent_1.ca_chains
1635  ca_chains_2 = qs_ent_2.ca_chains
1636  for ch_1 in repr_chains_1.keys():
1637  for ch_2 in repr_chains_2.keys():
1638  aln = _AlignAtomSeqs(ca_chains_1[ch_1], ca_chains_2[ch_2])
1639  if aln:
1640  chains_seqid = seq.alg.SequenceIdentity(aln)
1641  LogVerbose('Sequence identity', ch_1, ch_2, 'seqid=%.2f' % chains_seqid)
1642  chain_pairs.append((chains_seqid, ch_1, ch_2))
1643 
1644  # get top matching groups first
1645  chain_pairs = sorted(chain_pairs, reverse=True)
1646  chem_mapping = {}
1647  for _, c1, c2 in chain_pairs:
1648  skip = False
1649  for a,b in chem_mapping.iteritems():
1650  if repr_chains_1[c1] == a or repr_chains_2[c2] == b:
1651  skip = True
1652  break
1653  if not skip:
1654  chem_mapping[repr_chains_1[c1]] = repr_chains_2[c2]
1655  if swapped:
1656  chem_mapping = {y: x for x, y in chem_mapping.iteritems()}
1657  qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1658 
1659  # notify chains without partner
1660  mapped_1 = set([i for s in chem_mapping.keys() for i in s])
1661  chains_1 = set([c.name for c in qs_ent_1.ent.chains])
1662  if chains_1 - mapped_1:
1663  LogWarning('Unmapped Chains in %s: %s'
1664  % (qs_ent_1.GetName(), ','.join(list(chains_1 - mapped_1))))
1665 
1666  mapped_2 = set([i for s in chem_mapping.values() for i in s])
1667  chains_2 = set([c.name for c in qs_ent_2.ent.chains])
1668  if chains_2 - mapped_2:
1669  LogWarning('Unmapped Chains in %s: %s'
1670  % (qs_ent_2.GetName(), ','.join(list(chains_2 - mapped_2))))
1671 
1672  # check if we have any chains left
1673  LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping))
1674  if len(mapped_1) < 1 or len(mapped_2) < 1:
1675  raise QSscoreError('Less than 1 chains left in chem_mapping.')
1676  return chem_mapping
1677 
1678 def _SelectFew(l, max_elements):
1679  """Return l or copy of l with at most *max_elements* entries."""
1680  n_el = len(l)
1681  if n_el <= max_elements:
1682  return l
1683  else:
1684  # cheap integer ceiling (-1 to ensure that x*max_elements gets d_el = x)
1685  d_el = ((n_el - 1) // max_elements) + 1
1686  new_l = list()
1687  for i in range(0, n_el, d_el):
1688  new_l.append(l[i])
1689  return new_l
1690 
1691 def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain,
1692  clustalw_bin):
1693  """
1694  :return: Tuple of two :class:`~ost.mol.EntityView` objects containing subsets
1695  of *qs_ent_1* and *qs_ent_2*. Two entities are later created from
1696  those views (see :attr:`QSscorer.ent_to_cm_1` and
1697  :attr:`QSscorer.ent_to_cm_2`)
1698 
1699  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1700  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1701  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1702  :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm`
1703  """
1704  # make sure name doesn't contain spaces and is unique
1705  def _FixName(seq_name, seq_names):
1706  # get rid of spaces and make it unique
1707  seq_name = seq_name.replace(' ', '-')
1708  while seq_name in seq_names:
1709  seq_name += '-'
1710  return seq_name
1711  # resulting views into CA entities using CA chain sequences
1712  ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView()
1713  ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView()
1714  ca_chains_1 = qs_ent_1.ca_chains
1715  ca_chains_2 = qs_ent_2.ca_chains
1716  # go through all mapped chemical groups
1717  for group_1, group_2 in chem_mapping.iteritems():
1718  # MSA with ClustalW
1719  seq_list = seq.CreateSequenceList()
1720  # keep sequence-name (must be unique) to view mapping
1721  seq_to_empty_view = dict()
1722  for ch in group_1:
1723  sequence = ca_chains_1[ch].Copy()
1724  sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view)
1725  seq_to_empty_view[sequence.name] = ent_view_1
1726  seq_list.AddSequence(sequence)
1727  for ch in group_2:
1728  sequence = ca_chains_2[ch].Copy()
1729  sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view)
1730  seq_to_empty_view[sequence.name] = ent_view_2
1731  seq_list.AddSequence(sequence)
1732  alnc = ClustalW(seq_list, clustalw=clustalw_bin)
1733 
1734  # collect aligned residues (list of lists of sequence_count valid residues)
1735  residue_lists = list()
1736  sequence_count = alnc.sequence_count
1737  for col in alnc:
1738  residues = [col.GetResidue(ir) for ir in range(sequence_count)]
1739  if all([r.IsValid() for r in residues]):
1740  residue_lists.append(residues)
1741  # check number of aligned residues
1742  if len(residue_lists) < 5:
1743  raise QSscoreError('Not enough aligned residues.')
1744  elif max_ca_per_chain > 0:
1745  residue_lists = _SelectFew(residue_lists, max_ca_per_chain)
1746  # check what view is for which residue
1747  res_views = [seq_to_empty_view[alnc.sequences[ir].name] \
1748  for ir in range(sequence_count)]
1749  # add to view (note: only 1 CA atom per residue in here)
1750  for res_list in residue_lists:
1751  for res, view in zip(res_list, res_views):
1752  view.AddResidue(res, mol.ViewAddFlag.INCLUDE_ATOMS)
1753  # done
1754  return ent_view_1, ent_view_2
1755 
1756 
1757 def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping):
1758  """
1759  :return: A pair of comparable symmetry groups (for :attr:`QSscorer.symm_1`
1760  and :attr:`QSscorer.symm_2`) between the two structures.
1761  Empty lists if no symmetry identified.
1762 
1763  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1764  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1765  :param ent_to_cm_1: See :attr:`QSscorer.ent_to_cm_1`
1766  :param ent_to_cm_2: See :attr:`QSscorer.ent_to_cm_2`
1767  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1768  """
1769  LogInfo('Identifying Symmetry Groups...')
1770 
1771  # get possible symmetry groups
1772  symm_subg_1 = _GetSymmetrySubgroups(qs_ent_1, ent_to_cm_1,
1773  chem_mapping.keys())
1774  symm_subg_2 = _GetSymmetrySubgroups(qs_ent_2, ent_to_cm_2,
1775  chem_mapping.values())
1776 
1777  # check combination of groups
1778  LogInfo('Selecting Symmetry Groups...')
1779  # check how many mappings are to be checked for each pair of symmetry groups
1780  # -> lower number here will speed up worst-case runtime of _GetChainMapping
1781  # NOTE: this is tailored to speed up brute force all vs all mapping test
1782  # for preferred _CheckClosedSymmetry this is suboptimal!
1783  best_symm = []
1784  for symm_1, symm_2 in itertools.product(symm_subg_1, symm_subg_2):
1785  s1 = symm_1[0]
1786  s2 = symm_2[0]
1787  if len(s1) != len(s2):
1788  LogDebug('Discarded', str(symm_1), str(symm_2),
1789  ': different size of symmetry groups')
1790  continue
1791  count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1792  nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1793  LogDebug('OK', str(symm_1), str(symm_2), ': %s mappings' % nr_mapp)
1794  best_symm.append((nr_mapp, symm_1, symm_2))
1795 
1796  for _, symm_1, symm_2 in sorted(best_symm):
1797  s1 = symm_1[0]
1798  s2 = symm_2[0]
1799  group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1))
1800  group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2))
1801  # check if by superposing a pair of chains within the symmetry group to
1802  # superpose all chains within the symmetry group
1803  # -> if successful, the symmetry groups are compatible
1804  closed_symm = _CheckClosedSymmetry(group_1, group_2, symm_1, symm_2,
1805  chem_mapping, 6, 0.8, find_best=False)
1806 
1807  if closed_symm:
1808  return symm_1, symm_2
1809 
1810  # nothing found
1811  LogInfo('No coherent symmetry identified between structures')
1812  return [], []
1813 
1814 
1815 def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping,
1816  max_mappings_extensive):
1817  """
1818  :return: Tuple with mapping from *ent_1* to *ent_2* (see
1819  :attr:`QSscorer.chain_mapping`) and scheme used (see
1820  :attr:`QSscorer.chain_mapping_scheme`)
1821 
1822  :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
1823  :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
1824  :param symm_1: See :attr:`QSscorer.symm_1`
1825  :param symm_2: See :attr:`QSscorer.symm_2`
1826  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1827  :param max_mappings_extensive: See :attr:`QSscorer.max_mappings_extensive`
1828  """
1829  LogInfo('Symmetry-groups used in %s: %s' % (ent_1.GetName(), str(symm_1)))
1830  LogInfo('Symmetry-groups used in %s: %s' % (ent_2.GetName(), str(symm_2)))
1831 
1832  # quick check for closed symmetries
1833  thresholds = [( 'strict', 4, 0.8),
1834  ( 'tolerant', 6, 0.4),
1835  ('permissive', 8, 0.2)]
1836  for scheme, sup_thr, sup_fract in thresholds:
1837  # check if by superposing a pair of chains within the symmetry group to
1838  # superpose all chains of the two oligomers
1839  # -> this also returns the resulting mapping of chains
1840  mapping = _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2,
1841  chem_mapping, sup_thr, sup_fract)
1842  if mapping:
1843  LogInfo('Closed Symmetry with %s parameters' % scheme)
1844  if scheme == 'permissive':
1845  LogWarning('Permissive thresholds used for overlap based mapping ' + \
1846  'detection: check mapping manually: %s' % mapping)
1847  return mapping, scheme
1848 
1849  # NOTE that what follows below is sub-optimal:
1850  # - if the two structures don't fit at all, we may map chains rather randomly
1851  # - we may need a lot of operations to find globally "best" mapping
1852  # -> COST = O(N^2) * O(N!)
1853  # (first = all pairwise chain-superpose, latter = all chain mappings)
1854  # -> a greedy chain mapping choice algo (pick best RMSD for each one-by-one)
1855  # could reduce this to O(N^2) * O(N^2) but it would need to be validated
1856  # - one could also try some other heuristic based on center of atoms of chains
1857  # -> at this point we get a bad mapping anyways...
1858 
1859  # if we get here, we will need to check many combinations of mappings
1860  # -> check how many mappings must be checked and limit
1861  count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1862  LogInfo('Intra Symmetry-group mappings to check: %s' \
1863  % count['intra']['mappings'])
1864  LogInfo('Inter Symmetry-group mappings to check: %s' \
1865  % count['inter']['mappings'])
1866  nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1867  if nr_mapp > max_mappings_extensive:
1868  raise QSscoreError('Too many possible mappings: %s' % nr_mapp)
1869 
1870  # to speed up the computations we cache chain views and RMSDs
1871  cached_rmsd = _CachedRMSD(ent_1, ent_2)
1872 
1873  # get INTRA symmetry group chain mapping
1874  check = 0
1875  intra_mappings = [] # list of (RMSD, c1, c2, mapping) tuples (best superpose)
1876  # limit chem mapping to reference symmetry group
1877  ref_symm_1 = symm_1[0]
1878  ref_symm_2 = symm_2[0]
1879  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
1880  # for each chemically identical group
1881  for g1, g2 in asu_chem_mapping.iteritems():
1882  # try to superpose all possible pairs
1883  for c1, c2 in itertools.product(g1, g2):
1884  # get superposition transformation
1885  LogVerbose('Superposing chains: %s, %s' % (c1, c2))
1886  res = cached_rmsd.GetSuperposition(c1, c2)
1887  # compute RMSD of possible mappings
1888  cur_mappings = [] # list of (RMSD, mapping) tuples
1889  for mapping in _ListPossibleMappings(c1, c2, asu_chem_mapping):
1890  check += 1
1891  chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1892  cur_mappings.append((chains_rmsd, mapping))
1893  LogVerbose(str(mapping), str(chains_rmsd))
1894  best_rmsd, best_mapp = min(cur_mappings)
1895  intra_mappings.append((best_rmsd, c1, c2, best_mapp))
1896  # best chain-chain superposition defines the intra asu mapping
1897  _, intra_asu_c1, intra_asu_c2, intra_asu_mapp = min(intra_mappings)
1898 
1899  # if only one asu is present in any of the complexes, we're done
1900  if len(symm_1) == 1 or len(symm_2) == 1:
1901  mapping = intra_asu_mapp
1902  else:
1903  # the mapping is the element position within the asu chain list
1904  # -> this speed up a lot, assuming that the order of chain in asu groups
1905  # is following the order of symmetry operations
1906  index_asu_c1 = ref_symm_1.index(intra_asu_c1)
1907  index_asu_c2 = ref_symm_2.index(intra_asu_c2)
1908  index_mapp = {ref_symm_1.index(s1): ref_symm_2.index(s2) \
1909  for s1, s2 in intra_asu_mapp.iteritems()}
1910  LogInfo('Intra symmetry-group mapping: %s' % str(intra_asu_mapp))
1911 
1912  # get INTER symmetry group chain mapping
1913  # we take the first symmetry group from the complex having the most
1914  if len(symm_1) < len(symm_2):
1915  symm_combinations = list(itertools.product(symm_1, [symm_2[0]]))
1916  else:
1917  symm_combinations = list(itertools.product([symm_1[0]], symm_2))
1918 
1919  full_asu_mapp = {tuple(symm_1): tuple(symm_2)}
1920  full_mappings = [] # list of (RMSD, mapping) tuples
1921  for s1, s2 in symm_combinations:
1922  # get superposition transformation (we take best chain-pair in asu)
1923  LogVerbose('Superposing symmetry-group: %s, %s in %s, %s' \
1924  % (s1[index_asu_c1], s2[index_asu_c2], s1, s2))
1925  res = cached_rmsd.GetSuperposition(s1[index_asu_c1], s2[index_asu_c2])
1926  # compute RMSD of possible mappings
1927  for asu_mapp in _ListPossibleMappings(s1, s2, full_asu_mapp):
1928  check += 1
1929  # need to extract full chain mapping (use indexing)
1930  mapping = {}
1931  for a, b in asu_mapp.iteritems():
1932  for id_a, id_b in index_mapp.iteritems():
1933  mapping[a[id_a]] = b[id_b]
1934  chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1935  full_mappings.append((chains_rmsd, mapping))
1936  LogVerbose(str(mapping), str(chains_rmsd))
1937 
1938  if check != nr_mapp:
1939  LogWarning('Mapping number estimation was wrong') # sanity check
1940 
1941  # return best (lowest RMSD) mapping
1942  mapping = min(full_mappings)[1]
1943 
1944  LogWarning('Extensive search used for mapping detection (fallback). This ' + \
1945  'approach has known limitations. Check mapping manually: %s' \
1946  % mapping)
1947  return mapping, 'extensive'
1948 
1949 
1950 def _GetSymmetrySubgroups(qs_ent, ent, chem_groups):
1951  """
1952  Identify the symmetry (either cyclic C or dihedral D) of the protein and find
1953  all possible symmetry subgroups. This is done testing all combination of chain
1954  superposition and clustering them by the angles (D) or the axis (C) of the Rt
1955  operator.
1956 
1957  Groups of superposition which can fully reconstruct the structure are possible
1958  symmetry subgroups.
1959 
1960  :param qs_ent: Entity with cached angles and axis.
1961  :type qs_ent: :class:`QSscoreEntity`
1962  :param ent: Entity from which to extract chains (perfect alignment assumed
1963  for superposition as in :attr:`QSscorer.ent_to_cm_1`).
1964  :type ent: :class:`EntityHandle` or :class:`EntityView`
1965  :param chem_groups: List of tuples/lists of chain names in *ent*. Each list
1966  contains all chains belonging to a chem. group (could be
1967  from :attr:`QSscoreEntity.chem_groups` or from "keys()"
1968  or "values()" of :attr:`QSscorer.chem_mapping`)
1969 
1970  :return: A list of possible symmetry subgroups (each in same format as
1971  :attr:`QSscorer.symm_1`). If no symmetry is found, we return a list
1972  with a single symmetry subgroup with a single group with all chains.
1973  """
1974  # get angles / axis for pairwise transformations within same chem. group
1975  angles = {}
1976  axis = {}
1977  for cnames in chem_groups:
1978  for c1, c2 in itertools.combinations(cnames, 2):
1979  cur_angles = qs_ent.GetAngles(c1, c2)
1980  if not np.any(np.isnan(cur_angles)):
1981  angles[(c1,c2)] = cur_angles
1982  cur_axis = qs_ent.GetAxis(c1, c2)
1983  if not np.any(np.isnan(cur_axis)):
1984  axis[(c1,c2)] = cur_axis
1985 
1986  # cluster superpositions angles at different thresholds
1987  symm_groups = []
1988  LogVerbose('Possible symmetry-groups for: %s' % ent.GetName())
1989  for angle_thr in np.arange(0.1, 1, 0.1):
1990  d_symm_groups = _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)
1991  if d_symm_groups:
1992  for group in d_symm_groups:
1993  if group not in symm_groups:
1994  symm_groups.append(group)
1995  LogVerbose('Dihedral: %s' % group)
1996  LogInfo('Symmetry threshold %.1f used for angles of %s' \
1997  % (angle_thr, ent.GetName()))
1998  break
1999 
2000  # cluster superpositions axis at different thresholds
2001  for axis_thr in np.arange(0.1, 1, 0.1):
2002  c_symm_groups = _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
2003  if c_symm_groups:
2004  for group in c_symm_groups:
2005  if group not in symm_groups:
2006  symm_groups.append(group)
2007  LogVerbose('Cyclic: %s' % group)
2008  LogInfo('Symmetry threshold %.1f used for axis of %s' \
2009  % (axis_thr, ent.GetName()))
2010  break
2011 
2012  # fall back to single "group" containing all chains if none found
2013  if not symm_groups:
2014  LogInfo('No symmetry-group detected in %s' % ent.GetName())
2015  symm_groups = [[tuple([c for g in chem_groups for c in g])]]
2016 
2017  return symm_groups
2018 
2019 def _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr):
2020  """
2021  :return: Dihedral subgroups for :func:`_GetSymmetrySubgroups`
2022  (same return type as there). Empty list if fail.
2023 
2024  :param ent: See :func:`_GetSymmetrySubgroups`
2025  :param chem_groups: See :func:`_GetSymmetrySubgroups`
2026  :param angles: :class:`dict` (key = chain-pair-tuple, value = Euler angles)
2027  :param angle_thr: Euler angles distance threshold for clustering (float).
2028  """
2029  # cluster superpositions angles
2030  if len(angles) == 0:
2031  # nothing to be done here
2032  return []
2033  else:
2034  same_angles = _ClusterData(angles, angle_thr, _AngleArrayDistance)
2035 
2036  # get those which are non redundant and covering all chains
2037  symm_groups = []
2038  for clst in same_angles.values():
2039  group = clst.keys()
2040  if _ValidChainGroup(group, ent):
2041  if len(chem_groups) > 1:
2042  # if hetero, we want to group toghether different chains only
2043  symm_groups.append(zip(*group))
2044  else:
2045  # if homo, we also want pairs
2046  symm_groups.append(group)
2047  symm_groups.append(zip(*group))
2048  return symm_groups
2049 
2050 def _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr):
2051  """
2052  :return: Cyclic subgroups for :func:`_GetSymmetrySubgroups`
2053  (same return type as there). Empty list if fail.
2054 
2055  :param ent: See :func:`_GetSymmetrySubgroups`
2056  :param chem_groups: See :func:`_GetSymmetrySubgroups`
2057  :param angles: :class:`dict` (key = chain-pair-tuple, value = rotation axis)
2058  :param angle_thr: Axis distance threshold for clustering (float).
2059  """
2060  # cluster superpositions axis
2061  if len(axis) == 0:
2062  # nothing to be done here
2063  return []
2064  else:
2065  same_axis = _ClusterData(axis, axis_thr, _AxisDistance)
2066 
2067  # use to get grouping
2068  symm_groups = []
2069  for clst in same_axis.values():
2070  all_chain = [item for sublist in clst.keys() for item in sublist]
2071  if len(set(all_chain)) == ent.chain_count:
2072  # if we have an hetero we can exploit cyclic symmetry for grouping
2073  if len(chem_groups) > 1:
2074  ref_chem_group = chem_groups[0]
2075  # try two criteria for grouping
2076  cyclic_group_closest = []
2077  cyclic_group_iface = []
2078  for c1 in ref_chem_group:
2079  group_closest = [c1]
2080  group_iface = [c1]
2081  for chains in chem_groups[1:]:
2082  # center of atoms distance
2083  closest = _GetClosestChain(ent, c1, chains)
2084  # chain with bigger interface
2085  closest_iface = _GetClosestChainInterface(ent, c1, chains)
2086  group_closest.append(closest)
2087  group_iface.append(closest_iface)
2088  cyclic_group_closest.append(tuple(group_closest))
2089  cyclic_group_iface.append(tuple(group_iface))
2090  if _ValidChainGroup(cyclic_group_closest, ent):
2091  symm_groups.append(cyclic_group_closest)
2092  if _ValidChainGroup(cyclic_group_iface, ent):
2093  symm_groups.append(cyclic_group_iface)
2094  # if it is a homo, there's not much we can group
2095  else:
2096  symm_groups.append(chem_groups)
2097  return symm_groups
2098 
2099 def _ClusterData(data, thr, metric):
2100  """Wrapper for fclusterdata to get dict of clusters.
2101 
2102  :param data: :class:`dict` (keys for ID, values used for clustering)
2103  :return: :class:`dict` {cluster_idx: {data-key: data-value}}
2104  """
2105  # special case length 1
2106  if len(data) == 1:
2107  return {0: {data.keys()[0]: data.values()[0]} }
2108  # do the clustering
2109  cluster_indices = fclusterdata(np.asarray(data.values()), thr,
2110  method='complete', criterion='distance',
2111  metric=metric)
2112  # fclusterdata output is cluster ids -> put into dict of clusters
2113  res = {}
2114  for cluster_idx, data_key in zip(cluster_indices, data.keys()):
2115  if not cluster_idx in res:
2116  res[cluster_idx] = {}
2117  res[cluster_idx][data_key] = data[data_key]
2118  return res
2119 
2120 def _AngleArrayDistance(u, v):
2121  """
2122  :return: Average angular distance of two arrays of angles.
2123  :param u: Euler angles.
2124  :param v: Euler angles.
2125  """
2126  dist = []
2127  for a,b in zip(u,v):
2128  delta = abs(a-b)
2129  if delta > np.pi:
2130  delta = abs(2*np.pi - delta)
2131  dist.append(delta)
2132  return np.mean(dist)
2133 
2134 def _AxisDistance(u, v):
2135  """
2136  :return: Euclidean distance between two rotation axes. Axes can point in
2137  either direction so we ensure to use the closer one.
2138  :param u: Rotation axis.
2139  :param v: Rotation axis.
2140  """
2141  # get axes as arrays (for convenience)
2142  u = np.asarray(u)
2143  v = np.asarray(v)
2144  # get shorter of two possible distances (using v or -v)
2145  dv1 = u - v
2146  dv2 = u + v
2147  d1 = np.dot(dv1, dv1)
2148  d2 = np.dot(dv2, dv2)
2149  return np.sqrt(min(d1, d2))
2150 
2151 def _GetClosestChain(ent, ref_chain, chains):
2152  """
2153  :return: Chain closest to *ref_chain* based on center of atoms distance.
2154  :rtype: :class:`str`
2155  :param ent: See :func:`_GetSymmetrySubgroups`
2156  :param ref_chain: We look for chains closest to this one
2157  :type ref_chain: :class:`str`
2158  :param chains: We only consider these chains
2159  :type chains: :class:`list` of :class:`str`
2160  """
2161  contacts = []
2162  ref_center = ent.FindChain(ref_chain).center_of_atoms
2163  for ch in chains:
2164  center = ent.FindChain(ch).center_of_atoms
2165  contacts.append((geom.Distance(ref_center, center), ch))
2166  closest_chain = min(contacts)[1]
2167  return closest_chain
2168 
2169 def _GetClosestChainInterface(ent, ref_chain, chains):
2170  """
2171  :return: Chain with biggest interface (within 10 A) to *ref_chain*.
2172  :rtype: :class:`str`
2173  :param ent: See :func:`_GetSymmetrySubgroups`
2174  :param ref_chain: We look for chains closest to this one
2175  :type ref_chain: :class:`str`
2176  :param chains: We only consider these chains
2177  :type chains: :class:`list` of :class:`str`
2178  """
2179  # NOTE: this is computed on a subset of the full biounit and might be
2180  # inaccurate. Also it could be extracted from QSscoreEntity.contacts.
2181  closest = []
2182  for ch in chains:
2183  iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \
2184  % (ref_chain, ch))
2185  nr_res = iface_view.residue_count
2186  closest.append((nr_res, ch))
2187  closest_chain = max(closest)[1]
2188  return closest_chain
2189 
2190 def _ValidChainGroup(group, ent):
2191  """
2192  :return: True, if *group* has unique chain names and as many chains as *ent*
2193  :rtype: :class:`bool`
2194  :param group: Symmetry groups to check
2195  :type group: Same as :attr:`QSscorer.symm_1`
2196  :param ent: See :func:`_GetSymmetrySubgroups`
2197  """
2198  all_chain = [item for sublist in group for item in sublist]
2199  if len(all_chain) == len(set(all_chain)) and\
2200  len(all_chain) == ent.chain_count:
2201  return True
2202  return False
2203 
2204 def _LimitChemMapping(chem_mapping, limit_1, limit_2):
2205  """
2206  :return: Chem. mapping containing only chains in *limit_1* and *limit_2*
2207  :rtype: Same as :attr:`QSscorer.chem_mapping`
2208  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2209  :param limit_1: Limits chain names in chem_mapping.keys()
2210  :type limit_1: List/tuple of strings
2211  :param limit_2: Limits chain names in chem_mapping.values()
2212  :type limit_2: List/tuple of strings
2213  """
2214  # use set intersection to keep only mapping for chains in limit_X
2215  limit_1_set = set(limit_1)
2216  limit_2_set = set(limit_2)
2217  asu_chem_mapping = dict()
2218  for key, value in chem_mapping.iteritems():
2219  new_key = tuple(set(key) & limit_1_set)
2220  if new_key:
2221  asu_chem_mapping[new_key] = tuple(set(value) & limit_2_set)
2222  return asu_chem_mapping
2223 
2224 
2225 def _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping):
2226  """
2227  :return: Dictionary of number of mappings and superpositions to be performed.
2228  Returned as *result[X][Y] = number* with X = "intra" or "inter" and
2229  Y = "mappings" or "superpositions". The idea is that for each
2230  pairwise superposition we check all possible mappings.
2231  We can check the combinations within (intra) a symmetry group and
2232  once established, we check the combinations between different (inter)
2233  symmetry groups.
2234  :param symm_1: See :attr:`QSscorer.symm_1`
2235  :param symm_2: See :attr:`QSscorer.symm_2`
2236  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2237  """
2238  # setup
2239  c = {}
2240  c['intra'] = {}
2241  c['inter'] = {}
2242  c['intra']['mappings'] = 0
2243  c['intra']['superpositions'] = 0
2244  c['inter']['mappings'] = 0
2245  c['inter']['superpositions'] = 0
2246  # intra ASU mappings within reference symmetry groups
2247  ref_symm_1 = symm_1[0]
2248  ref_symm_2 = symm_2[0]
2249  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2250  for g1, g2 in asu_chem_mapping.iteritems():
2251  chain_superpositions = len(g1) * len(g2)
2252  c['intra']['superpositions'] += chain_superpositions
2253  map_per_sup = _PermutationOrCombinations(g1[0], g2[0], asu_chem_mapping)
2254  c['intra']['mappings'] += chain_superpositions * map_per_sup
2255  if len(symm_1) == 1 or len(symm_2) == 1:
2256  return c
2257  # inter ASU mappings
2258  asu_superposition = min(len(symm_1), len(symm_2))
2259  c['inter']['superpositions'] = asu_superposition
2260  asu = {tuple(symm_1): tuple(symm_2)}
2261  map_per_sup = _PermutationOrCombinations(ref_symm_1, ref_symm_2, asu)
2262  c['inter']['mappings'] = asu_superposition * map_per_sup
2263  return c
2264 
2265 def _PermutationOrCombinations(sup1, sup2, chem_mapping):
2266  """Should match len(_ListPossibleMappings(sup1, sup2, chem_mapping))."""
2267  # remove superposed elements, put smallest complex as key
2268  chem_map = {}
2269  for a,b in chem_mapping.iteritems():
2270  new_a = tuple([x for x in a if x != sup1])
2271  new_b = tuple([x for x in b if x != sup2])
2272  chem_map[new_a] = new_b
2273  mapp_nr = 1.0
2274  for a, b in chem_map.iteritems():
2275  if len(a) == len(b):
2276  mapp_nr *= factorial(len(a))
2277  elif len(a) < len(b):
2278  mapp_nr *= binom(len(b), len(a))
2279  elif len(a) > len(b):
2280  mapp_nr *= binom(len(a), len(b))
2281  return int(mapp_nr)
2282 
2283 def _ListPossibleMappings(sup1, sup2, chem_mapping):
2284  """
2285  Return a flat list of all possible mappings given *chem_mapping* and keeping
2286  mapping of *sup1* and *sup2* fixed. For instance if elements are chain names
2287  this is all the mappings to check for a given superposition.
2288 
2289  Elements in first complex are defined by *chem_mapping.keys()* (list of list
2290  of elements) and elements in second complex by *chem_mapping.values()*. If
2291  complexes don't have same number of elements, we map only elements for the one
2292  with less. Also mapping is only between elements of mapped groups according to
2293  *chem_mapping*.
2294 
2295  :rtype: :class:`list` of :class:`dict` (key = element in chem_mapping-key,
2296  value = element in chem_mapping-value)
2297  :param sup1: Element for which mapping is fixed.
2298  :type sup1: Like element in chem_mapping-key
2299  :param sup2: Element for which mapping is fixed.
2300  :type sup2: Like element in chem_mapping-value
2301  :param chem_mapping: Defines mapping between groups of elements (e.g. result
2302  of :func:`_LimitChemMapping`).
2303  :type chem_mapping: :class:`dict` with key / value = :class:`tuple`
2304 
2305  :raises: :class:`QSscoreError` if reference complex (first one or one with
2306  less elements) has more elements for any given mapped group.
2307  """
2308  # find smallest complex
2309  chain1 = [i for s in chem_mapping.keys() for i in s]
2310  chain2 = [i for s in chem_mapping.values() for i in s]
2311  swap = False
2312  if len(chain1) > len(chain2):
2313  swap = True
2314  # remove superposed elements, put smallest complex as key
2315  chem_map = {}
2316  for a, b in chem_mapping.iteritems():
2317  new_a = tuple([x for x in a if x != sup1])
2318  new_b = tuple([x for x in b if x != sup2])
2319  if swap:
2320  chem_map[new_b] = new_a
2321  else:
2322  chem_map[new_a] = new_b
2323  # generate permutations or combinations of chemically
2324  # equivalent chains
2325  chem_perm = []
2326  chem_ref = []
2327  for a, b in chem_map.iteritems():
2328  if len(a) == len(b):
2329  chem_perm.append(list(itertools.permutations(b)))
2330  chem_ref.append(a)
2331  elif len(a) < len(b):
2332  chem_perm.append(list(itertools.combinations(b, len(a))))
2333  chem_ref.append(a)
2334  else:
2335  raise QSscoreError('Impossible to define reference group: %s' \
2336  % str(chem_map))
2337  # save the list of possible mappings
2338  mappings = []
2339  flat_ref = [i for s in chem_ref for i in s]
2340  for perm in itertools.product(*chem_perm):
2341  flat_perm = [i for s in perm for i in s]
2342  d = {c1: c2 for c1, c2 in zip(flat_ref, flat_perm)}
2343  if swap:
2344  d = {v: k for k, v in d.items()}
2345  d.update({sup1: sup2})
2346  mappings.append(d)
2347  return mappings
2348 
2349 
2350 def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping,
2351  sup_thr=4, sup_fract=0.8, find_best=True):
2352  """
2353  Quick check if we can superpose two chains and get a mapping for all other
2354  chains using the same transformation. The mapping is defined by sufficient
2355  overlap of the transformed chain of *ent_1* onto another chain in *ent_2*.
2356 
2357  :param ent_1: Entity to map to *ent_2* (perfect alignment assumed between
2358  chains of same chem. group as in :attr:`QSscorer.ent_to_cm_1`).
2359  Views are ok but only to select full chains!
2360  :param ent_2: Entity to map to (perfect alignment assumed between
2361  chains of same chem. group as in :attr:`QSscorer.ent_to_cm_2`).
2362  Views are ok but only to select full chains!
2363  :param symm_1: Symmetry groups to use. We only superpose chains within
2364  reference symmetry group of *symm_1* and *symm_2*.
2365  See :attr:`QSscorer.symm_1`
2366  :param symm_2: See :attr:`QSscorer.symm_2`
2367  :param chem_mapping: See :attr:`QSscorer.chem_mapping`.
2368  All chains in *ent_1* / *ent_2* must be listed here!
2369  :param sup_thr: Distance around transformed chain in *ent_1* to check for
2370  overlap.
2371  :type sup_thr: :class:`float`
2372  :param sup_fract: Fraction of atoms in chain of *ent_2* that must be
2373  overlapped for overlap to be sufficient.
2374  :type sup_fract: :class:`float`
2375  :param find_best: If True, we look for best mapping according to
2376  :func:`_GetMappedRMSD`. Otherwise, we return first suitable
2377  mapping.
2378  :type find_best: :class:`bool`
2379 
2380  :return: Mapping from *ent_1* to *ent_2* or None if none found. Mapping, if
2381  found, is as in :attr:`QSscorer.chain_mapping`.
2382  :rtype: :class:`dict` (:class:`str` / :class:`str`)
2383  """
2384  # limit chem mapping to reference symmetry group
2385  ref_symm_1 = symm_1[0]
2386  ref_symm_2 = symm_2[0]
2387  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2388  # for each chemically identical group
2389  rmsd_mappings = []
2390  for g1, g2 in asu_chem_mapping.iteritems():
2391  # try to superpose all possible pairs
2392  # -> note that some chain-chain combinations may work better than others
2393  # to superpose the full oligomer (e.g. if some chains are open/closed)
2394  for c1, c2 in itertools.product(g1, g2):
2395  # get superposition transformation
2396  chain_1 = ent_1.Select('cname="%s"' % c1)
2397  chain_2 = ent_2.Select('cname="%s"' % c2)
2398  res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2399  # look for overlaps
2400  mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping,
2401  res.transformation, sup_thr,
2402  sup_fract)
2403  if not mapping:
2404  continue
2405  # early abort if we only want the first one
2406  if not find_best:
2407  return mapping
2408  else:
2409  # get RMSD for sorting
2410  rmsd = _GetMappedRMSD(ent_1, ent_2, mapping, res.transformation)
2411  rmsd_mappings.append((rmsd, mapping))
2412  # return best mapping
2413  if rmsd_mappings:
2414  return min(rmsd_mappings)[1]
2415  else:
2416  return None
2417 
2418 def _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation,
2419  sup_thr, sup_fract):
2420  """
2421  :return: Dict with chain mapping from *ent_1* to *ent_2* or None if failed
2422  (see :func:`_CheckClosedSymmetry`).
2423  :param ent_1: See :func:`_CheckClosedSymmetry`
2424  :param ent_2: See :func:`_CheckClosedSymmetry`
2425  :param chem_mapping: See :func:`_CheckClosedSymmetry`
2426  :param transformation: Superposition transformation to be applied to *ent_1*.
2427  :param sup_thr: See :func:`_CheckClosedSymmetry`
2428  :param sup_fract: See :func:`_CheckClosedSymmetry`
2429  """
2430  # NOTE: this seems to be the comput. most expensive part in QS scoring
2431  # -> it could be moved to C++ for speed-up
2432  # -> the next expensive bits are ClustalW and GetContacts btw...
2433 
2434  # swap if needed (ent_1 must be shorter or same)
2435  if ent_1.chain_count > ent_2.chain_count:
2436  swap = True
2437  ent_1, ent_2 = ent_2, ent_1
2438  transformation = geom.Invert(transformation)
2439  chem_pairs = zip(chem_mapping.values(), chem_mapping.keys())
2440  else:
2441  swap = False
2442  chem_pairs = chem_mapping.iteritems()
2443  # sanity check
2444  if ent_1.chain_count == 0:
2445  return None
2446  # extract valid partners for each chain in ent_1
2447  chem_partners = dict()
2448  for cg1, cg2 in chem_pairs:
2449  for ch in cg1:
2450  chem_partners[ch] = set(cg2)
2451  # get mapping for each chain in ent_1
2452  mapping = dict()
2453  mapped_chains = set()
2454  for ch_1 in ent_1.chains:
2455  # get all neighbors split by chain (NOTE: this muight be moved to C++)
2456  ch_atoms = {ch_2.name: set() for ch_2 in ent_2.chains}
2457  for a_1 in ch_1.handle.atoms:
2458  transformed_pos = geom.Vec3(transformation * geom.Vec4(a_1.pos))
2459  a_within = ent_2.FindWithin(transformed_pos, sup_thr)
2460  for a_2 in a_within:
2461  ch_atoms[a_2.chain.name].add(a_2.hash_code)
2462  # get one with most atoms in overlap
2463  max_num, max_name = max((len(atoms), name)
2464  for name, atoms in ch_atoms.iteritems())
2465  # early abort if overlap fraction not good enough
2466  ch_2 = ent_2.FindChain(max_name)
2467  if max_num == 0 or max_num / float(ch_2.handle.atom_count) < sup_fract:
2468  return None
2469  # early abort if mapped to chain of different chem. group
2470  ch_1_name = ch_1.name
2471  if ch_1_name not in chem_partners:
2472  raise RuntimeError("Chem. mapping doesn't contain all chains!")
2473  if max_name not in chem_partners[ch_1_name]:
2474  return None
2475  # early abort if multiple ones mapped to same chain
2476  if max_name in mapped_chains:
2477  return None
2478  # set mapping
2479  mapping[ch_1_name] = max_name
2480  mapped_chains.add(max_name)
2481  # unswap if needed and return
2482  if swap:
2483  mapping = {v: k for k, v in mapping.iteritems()}
2484  return mapping
2485 
2486 def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation):
2487  """
2488  :return: RMSD between complexes considering chain mapping.
2489  :param ent_1: Entity mapped to *ent_2* (perfect alignment assumed between
2490  mapped chains as in :attr:`QSscorer.ent_to_cm_1`).
2491  :param ent_2: Entity which was mapped to (perfect alignment assumed between
2492  mapped chains as in :attr:`QSscorer.ent_to_cm_2`).
2493  :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2494  :param transformation: Superposition transformation to be applied to *ent_1*.
2495  """
2496  # collect RMSDs and atom counts chain by chain and combine afterwards
2497  rmsds = []
2498  atoms = []
2499  for c1, c2 in chain_mapping.iteritems():
2500  # get views and atom counts
2501  chain_1 = ent_1.Select('cname="%s"' % c1)
2502  chain_2 = ent_2.Select('cname="%s"' % c2)
2503  atom_count = chain_1.atom_count
2504  if atom_count != chain_2.atom_count:
2505  raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!')
2506  # get RMSD
2507  rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2508  # update lists
2509  rmsds.append(rmsd)
2510  atoms.append(float(atom_count))
2511  # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2512  rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2513  return rmsd
2514 
2516  """Helper object for repetitive RMSD calculations.
2517  Meant to speed up :func:`_GetChainMapping` but could also be used to replace
2518  :func:`_GetMappedRMSD` in :func:`_CheckClosedSymmetry`.
2519 
2520  :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
2521  :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
2522  """
2523  def __init__(self, ent_1, ent_2):
2524  # initialize caches and keep entities
2525  self.ent_1 = ent_1
2526  self.ent_2 = ent_2
2527  self._chain_views_1 = dict()
2528  self._chain_views_2 = dict()
2529  self._pair_rmsd = dict() # key = (c1,c2), value = (atom_count,rmsd)
2530 
2531  def GetChainView1(self, cname):
2532  """Get cached view on chain *cname* for :attr:`ent_1`."""
2533  if cname not in self._chain_views_1:
2534  self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname)
2535  return self._chain_views_1[cname]
2536 
2537  def GetChainView2(self, cname):
2538  """Get cached view on chain *cname* for :attr:`ent_2`."""
2539  if cname not in self._chain_views_2:
2540  self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname)
2541  return self._chain_views_2[cname]
2542 
2543  def GetSuperposition(self, c1, c2):
2544  """Get superposition result (no change in entities!) for *c1* to *c2*.
2545  This invalidates cached RMSD results used in :func:`GetMappedRMSD`.
2546 
2547  :param c1: chain name for :attr:`ent_1`.
2548  :param c2: chain name for :attr:`ent_2`.
2549  """
2550  # invalidate _pair_rmsd
2551  self._pair_rmsd = dict()
2552  # superpose
2553  chain_1 = self.GetChainView1(c1)
2554  chain_2 = self.GetChainView2(c2)
2555  if chain_1.atom_count != chain_2.atom_count:
2556  raise RuntimeError('Chains in GetSuperposition not perfectly aligned!')
2557  return mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2558 
2559  def GetMappedRMSD(self, chain_mapping, transformation):
2560  """
2561  :return: RMSD between complexes considering chain mapping.
2562  :param chain_mapping: See :attr:`QSscorer.chain_mapping`.
2563  :param transformation: Superposition transformation (e.g. res.transformation
2564  for res = :func:`GetSuperposition`).
2565  """
2566  # collect RMSDs and atom counts chain by chain and combine afterwards
2567  rmsds = []
2568  atoms = []
2569  for c1, c2 in chain_mapping.iteritems():
2570  # cached?
2571  if (c1, c2) in self._pair_rmsd:
2572  atom_count, rmsd = self._pair_rmsd[(c1, c2)]
2573  else:
2574  # get views and atom counts
2575  chain_1 = self.GetChainView1(c1)
2576  chain_2 = self.GetChainView2(c2)
2577  atom_count = chain_1.atom_count
2578  if atom_count != chain_2.atom_count:
2579  raise RuntimeError('Chains in GetMappedRMSD not perfectly aligned!')
2580  # get RMSD
2581  rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2582  self._pair_rmsd[(c1, c2)] = (atom_count, rmsd)
2583  # update lists
2584  rmsds.append(rmsd)
2585  atoms.append(float(atom_count))
2586  # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2587  rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2588  return rmsd
2589 
2590 
2591 def _CleanUserSymmetry(symm, ent):
2592  """Clean-up user provided symmetry.
2593 
2594  :param symm: Symmetry group as in :attr:`QSscorer.symm_1`
2595  :param ent: Entity from which to extract chain names
2596  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
2597  :return: New symmetry group limited to chains in sub-structure *ent*
2598  (see :attr:`QSscorer.symm_1`). Empty list if invalid symmetry.
2599  """
2600  # restrict symm to only contain chains in ent
2601  chain_names = set([ch.name for ch in ent.chains])
2602  new_symm = list()
2603  for symm_group in symm:
2604  new_group = tuple(ch for ch in symm_group if ch in chain_names)
2605  if new_group:
2606  new_symm.append(new_group)
2607  # report it
2608  if new_symm != symm:
2609  LogInfo("Cleaned user symmetry for %s: %s" % (ent.GetName(), new_symm))
2610  # do all groups have same length?
2611  lengths = [len(symm_group) for symm_group in new_symm]
2612  if lengths[1:] != lengths[:-1]:
2613  LogWarning('User symmetry group of different sizes for %s. Ignoring it!' \
2614  % (ent.GetName()))
2615  return []
2616  # do we cover all chains?
2617  s_chain_names = set([ch for symm_group in new_symm for ch in symm_group])
2618  if len(s_chain_names) != sum(lengths):
2619  LogWarning('User symmetry group for %s has duplicate chains. Ignoring it!' \
2620  % (ent.GetName()))
2621  return []
2622  if s_chain_names != chain_names:
2623  LogWarning('User symmetry group for %s is missing chains. Ignoring it!' \
2624  % (ent.GetName()))
2625  return []
2626  # ok all good
2627  return new_symm
2628 
2629 def _AreValidSymmetries(symm_1, symm_2):
2630  """Check symmetry pair for major problems.
2631 
2632  :return: False if any of the two is empty or if they're compatible in size.
2633  :rtype: :class:`bool`
2634  """
2635  if not symm_1 or not symm_2:
2636  return False
2637  if len(symm_1) != 1 or len(symm_2) != 1:
2638  if not len(symm_1[0]) == len(symm_2[0]):
2639  LogWarning('Symmetry groups of different sizes. Ignoring them!')
2640  return False
2641  return True
2642 
2643 def _GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment):
2644  """
2645  :return: Alignments of 2 structures given chain mapping
2646  (see :attr:`QSscorer.alignments`).
2647  :param ent_1: Entity containing all chains in *chain_mapping.keys()*.
2648  Views to this entity attached to first sequence of each aln.
2649  :param ent_2: Entity containing all chains in *chain_mapping.values()*.
2650  Views to this entity attached to second sequence of each aln.
2651  :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2652  :param res_num_alignment: See :attr:`QSscorer.res_num_alignment`
2653  """
2654  alns = list()
2655  for ch_1_name in sorted(chain_mapping):
2656  # get both sequences incl. attached view
2657  ch_1 = ent_1.FindChain(ch_1_name)
2658  ch_2 = ent_2.FindChain(chain_mapping[ch_1_name])
2659  if res_num_alignment:
2660  max_res_num = max([r.number.GetNum() for r in ch_1.residues] +
2661  [r.number.GetNum() for r in ch_2.residues])
2662  ch1_aln = ["-"] * max_res_num
2663  ch2_aln = ["-"] * max_res_num
2664  for res in ch_1.residues:
2665  ch1_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2666  ch1_aln = "".join(ch1_aln)
2667  seq_1 = seq.CreateSequence(ch_1.name, str(ch1_aln))
2668  seq_1.AttachView(ch_1.Select(""))
2669  for res in ch_2.residues:
2670  ch2_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2671  ch2_aln = "".join(ch2_aln)
2672  seq_2 = seq.CreateSequence(ch_2.name, str(ch2_aln))
2673  seq_2.AttachView(ch_2.Select(""))
2674  # Create alignment
2675  aln = seq.CreateAlignment()
2676  aln.AddSequence(seq_1)
2677  aln.AddSequence(seq_2)
2678  else:
2679  seq_1 = seq.SequenceFromChain(ch_1.name, ch_1)
2680  seq_2 = seq.SequenceFromChain(ch_2.name, ch_2)
2681  # align them
2682  aln = _AlignAtomSeqs(seq_1, seq_2)
2683  if aln:
2684  alns.append(aln)
2685  return alns
2686 
2687 def _GetMappedResidues(alns):
2688  """
2689  :return: Mapping of shared residues in *alns* (with views attached)
2690  (see :attr:`QSscorer.mapped_residues`).
2691  :param alns: See :attr:`QSscorer.alignments`
2692  """
2693  mapped_residues = dict()
2694  for aln in alns:
2695  # prepare dict for c1
2696  c1 = aln.GetSequence(0).name
2697  mapped_residues[c1] = dict()
2698  # get shared residues
2699  v1, v2 = seq.ViewsFromAlignment(aln)
2700  for res_1, res_2 in zip(v1.residues, v2.residues):
2701  r1 = res_1.number.num
2702  r2 = res_2.number.num
2703  mapped_residues[c1][r1] = r2
2704 
2705  return mapped_residues
2706 
2707 def _GetExtraWeights(contacts, done, mapped_residues):
2708  """Return sum of extra weights for contacts of chains in set and not in done.
2709  :return: Tuple (weight_extra_mapped, weight_extra_all).
2710  weight_extra_mapped only sums if both cX,rX in mapped_residues
2711  weight_extra_all sums all
2712  :param contacts: See :func:`GetContacts` for first entity
2713  :param done: List of (c1, c2, r1, r2) tuples to ignore
2714  :param mapped_residues: See :func:`_GetMappedResidues`
2715  """
2716  weight_extra_mapped = 0
2717  weight_extra_non_mapped = 0
2718  for c1 in contacts:
2719  for c2 in contacts[c1]:
2720  for r1 in contacts[c1][c2]:
2721  for r2 in contacts[c1][c2][r1]:
2722  if (c1, c2, r1, r2) not in done:
2723  weight = _weight(contacts[c1][c2][r1][r2])
2724  if c1 in mapped_residues and r1 in mapped_residues[c1] \
2725  and c2 in mapped_residues and r2 in mapped_residues[c2]:
2726  weight_extra_mapped += weight
2727  else:
2728  weight_extra_non_mapped += weight
2729  return weight_extra_mapped, weight_extra_mapped + weight_extra_non_mapped
2730 
2731 def _GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping):
2732  """Get QS scores (see :class:`QSscorer`).
2733 
2734  Note that if some chains are not to be considered at all, they must be removed
2735  from *contacts_1* / *contacts_2* prior to calling this.
2736 
2737  :param contacts_1: See :func:`GetContacts` for first entity
2738  :param contacts_2: See :func:`GetContacts` for second entity
2739  :param mapped_residues: See :func:`_GetMappedResidues`
2740  :param chain_mapping: Maps any chain name in *mapped_residues* to chain name
2741  for *contacts_2*.
2742  :type chain_mapping: :class:`dict` (:class:`str` / :class:`str`)
2743  :return: Tuple (QS_best, QS_global) (see :attr:`QSscorer.best_score` and
2744  see :attr:`QSscorer.global_score`)
2745  """
2746  # keep track of considered contacts as set of (c1,c2,r1,r2) tuples
2747  done_1 = set()
2748  done_2 = set()
2749  weighted_scores = 0
2750  weight_sum = 0
2751  # naming cXY, rXY: X = 1/2 for contact, Y = 1/2/T for mapping (T = tmp)
2752  # -> d1 = contacts_1[c11][c21][r11][r21], d2 = contacts_2[c12][c22][r12][r22]
2753  for c11 in contacts_1:
2754  # map to other one
2755  if c11 not in mapped_residues: continue
2756  c1T = chain_mapping[c11]
2757  # second chain
2758  for c21 in contacts_1[c11]:
2759  # map to other one
2760  if c21 not in mapped_residues: continue
2761  c2T = chain_mapping[c21]
2762  # flip if needed
2763  flipped_chains = (c1T > c2T)
2764  if flipped_chains:
2765  c12, c22 = c2T, c1T
2766  else:
2767  c12, c22 = c1T, c2T
2768  # skip early if no contacts there
2769  if c12 not in contacts_2 or c22 not in contacts_2[c12]: continue
2770  # loop over res.num. in c11
2771  for r11 in contacts_1[c11][c21]:
2772  # map to other one and skip if no contacts there
2773  if r11 not in mapped_residues[c11]: continue
2774  r1T = mapped_residues[c11][r11]
2775  # loop over res.num. in c21
2776  for r21 in contacts_1[c11][c21][r11]:
2777  # map to other one and skip if no contacts there
2778  if r21 not in mapped_residues[c21]: continue
2779  r2T = mapped_residues[c21][r21]
2780  # flip if needed
2781  if flipped_chains:
2782  r12, r22 = r2T, r1T
2783  else:
2784  r12, r22 = r1T, r2T
2785  # skip early if no contacts there
2786  if r12 not in contacts_2[c12][c22]: continue
2787  if r22 not in contacts_2[c12][c22][r12]: continue
2788  # ok now we can finally do the scoring
2789  d1 = contacts_1[c11][c21][r11][r21]
2790  d2 = contacts_2[c12][c22][r12][r22]
2791  weight = _weight(min(d1, d2))
2792  weighted_scores += weight * (1 - abs(d1 - d2) / 12)
2793  weight_sum += weight
2794  # keep track of done ones
2795  done_1.add((c11, c21, r11, r21))
2796  done_2.add((c12, c22, r12, r22))
2797 
2798  LogVerbose("Shared contacts: %d" % len(done_1))
2799 
2800  # add the extra weights
2801  weights_extra_1 = _GetExtraWeights(contacts_1, done_1, mapped_residues)
2802  mapped_residues_2 = dict()
2803  for c1 in mapped_residues:
2804  c2 = chain_mapping[c1]
2805  mapped_residues_2[c2] = set()
2806  for r1 in mapped_residues[c1]:
2807  mapped_residues_2[c2].add(mapped_residues[c1][r1])
2808  weights_extra_2 = _GetExtraWeights(contacts_2, done_2, mapped_residues_2)
2809  weight_extra_mapped = weights_extra_1[0] + weights_extra_2[0]
2810  weight_extra_all = weights_extra_1[1] + weights_extra_2[1]
2811 
2812  # get scores
2813  denom_best = weight_sum + weight_extra_mapped
2814  denom_all = weight_sum + weight_extra_all
2815  if denom_best == 0:
2816  QS_best = 0
2817  else:
2818  QS_best = weighted_scores / denom_best
2819  if denom_all == 0:
2820  QS_global = 0
2821  else:
2822  QS_global = weighted_scores / denom_all
2823  return QS_best, QS_global
2824 
2825 def _weight(dist):
2826  """
2827  This weight expresses the probability of two residues to interact given the CB
2828  distance (from Xu et al. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2573399/)
2829  """
2830  if dist <= 5.0:
2831  return 1
2832  x = (dist-5.0)/4.28;
2833  return np.exp(-2*x*x)
2834 
2835 
2836 def _GetQsSuperposition(alns):
2837  """
2838  :return: Superposition result based on shared CA atoms in *alns*
2839  (with views attached) (see :attr:`QSscorer.superposition`).
2840  :param alns: See :attr:`QSscorer.alignments`
2841  """
2842  # check input
2843  if not alns:
2844  raise QSscoreError('No successful alignments for superposition!')
2845  # prepare views
2846  view_1 = alns[0].GetSequence(0).attached_view.CreateEmptyView()
2847  view_2 = alns[0].GetSequence(1).attached_view.CreateEmptyView()
2848  # collect CA from alignments in proper order
2849  for aln in alns:
2850  for col in aln:
2851  res_1 = col.GetResidue(0)
2852  res_2 = col.GetResidue(1)
2853  if res_1.IsValid() and res_2.IsValid():
2854  ca_1 = res_1.FindAtom('CA')
2855  ca_2 = res_2.FindAtom('CA')
2856  if ca_1.IsValid() and ca_2.IsValid():
2857  view_1.AddAtom(ca_1)
2858  view_2.AddAtom(ca_2)
2859  # superpose them without chainging entities
2860  res = mol.alg.SuperposeSVD(view_1, view_2, apply_transform=False)
2861  return res
2862 
2863 
2864 def _AddResidue(edi, res, rnum, chain, calpha_only):
2865  """
2866  Add residue *res* with res. num. *run* to given *chain* using editor *edi*.
2867  Either all atoms added or (if *calpha_only*) only CA.
2868  """
2869  if calpha_only:
2870  ca_atom = res.FindAtom('CA')
2871  if ca_atom.IsValid():
2872  new_res = edi.AppendResidue(chain, res.name, rnum)
2873  edi.InsertAtom(new_res, ca_atom.name, ca_atom.pos)
2874  else:
2875  new_res = edi.AppendResidue(chain, res.name, rnum)
2876  for atom in res.atoms:
2877  edi.InsertAtom(new_res, atom.name, atom.pos)
2878 
2879 def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains):
2880  """
2881  Create two new entities (based on the alignments attached views) where all
2882  residues have same numbering (when they're aligned) and they are all pushed to
2883  a single chain X. Also append extra chains contained in *ent_1* and *ent_2*
2884  but not contained in *alns*.
2885 
2886  Used for :attr:`QSscorer.lddt_ref` and :attr:`QSscorer.lddt_mdl`
2887 
2888  :param alns: List of alignments with attached views (first sequence: *ent_1*,
2889  second: *ent_2*). Residue number in single chain is column index
2890  of current alignment + sum of lengths of all previous alignments
2891  (order of alignments as in input list).
2892  :type alns: See :attr:`QSscorer.alignments`
2893  :param ent_1: First entity to process.
2894  :type ent_1: :class:`~ost.mol.EntityHandle`
2895  :param ent_2: Second entity to process.
2896  :type ent_2: :class:`~ost.mol.EntityHandle`
2897  :param calpha_only: If True, we only include CA atoms instead of all.
2898  :type calpha_only: :class:`bool`
2899  :param penalize_extra_chains: If True, extra chains are added to model and
2900  reference. Otherwise, only mapped ones.
2901  :type penalize_extra_chains: :class:`bool`
2902 
2903  :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*)
2904  :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle`
2905  """
2906  # first new entity
2907  ent_ren_1 = mol.CreateEntity()
2908  ed_1 = ent_ren_1.EditXCS()
2909  new_chain_1 = ed_1.InsertChain('X')
2910  # second one
2911  ent_ren_2 = mol.CreateEntity()
2912  ed_2 = ent_ren_2.EditXCS()
2913  new_chain_2 = ed_2.InsertChain('X')
2914  # the alignment already contains sorted chains
2915  rnum = 0
2916  chain_done_1 = set()
2917  chain_done_2 = set()
2918  for aln in alns:
2919  chain_done_1.add(aln.GetSequence(0).name)
2920  chain_done_2.add(aln.GetSequence(1).name)
2921  for col in aln:
2922  rnum += 1
2923  # add valid residues to single chain entities
2924  res_1 = col.GetResidue(0)
2925  if res_1.IsValid():
2926  _AddResidue(ed_1, res_1, rnum, new_chain_1, calpha_only)
2927  res_2 = col.GetResidue(1)
2928  if res_2.IsValid():
2929  _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only)
2930  # extra chains?
2931  if penalize_extra_chains:
2932  for chain in ent_1.chains:
2933  if chain.name in chain_done_1:
2934  continue
2935  for res in chain.residues:
2936  rnum += 1
2937  _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only)
2938  for chain in ent_2.chains:
2939  if chain.name in chain_done_2:
2940  continue
2941  for res in chain.residues:
2942  rnum += 1
2943  _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only)
2944  # get entity names
2945  ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName())
2946  ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName())
2947  # connect atoms
2948  if not conop.GetDefaultLib():
2949  raise RuntimeError("QSscore computation requires a compound library!")
2950  pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
2951  pr.Process(ent_ren_1)
2952  ed_1.UpdateICS()
2953  pr.Process(ent_ren_2)
2954  ed_2.UpdateICS()
2955  return ent_ren_1, ent_ren_2
2956 
2957 
2958 # specify public interface
2959 __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts',
2960  'GetContacts', 'OligoLDDTScorer', 'MappedLDDTScorer')
def _ComputeAlignedEntities
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:544
def _GetSuperposeData
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:818
Mat4 DLLEXPORT_OST_GEOM Invert(const Mat4 &m)
def _InitScorer
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:1406
Real Distance(const Vec3 &p1, const Vec3 &p2)
returns the distance between two points
Definition: vecmat3_op.hh:199
Three dimensional vector class, using Real precision.
Definition: vec3.hh:42
def _PrepareOligoEntities
Class internal helpers.
Definition: qsscoring.py:1208